## Backend

*The code that runs PoeML*

### This cell block contains the function I call from within 'views.py'

In [239]:
def ModelIt(url):

    import flickrapi
    import json
    import re
    import io
    from google.cloud import vision
    from google.cloud.vision import types
    from PIL import Image, ImageDraw
    import os
    import pandas as pd
    import spacy
    from sklearn.metrics.pairwise import euclidean_distances, cosine_distances, cosine_similarity
    import pandas as pd
    import sqlalchemy # pandas-mysql interface library
    import sqlalchemy.exc # exception handling
    import numpy as np

    ##################################
    # parameters
    n = 4 #number of images/sonnets to return

    #######################################
    # functions

    # connect to data base
    def connect_db():
        from sqlalchemy import create_engine
        dbname = 'poetry_db'
        username = 'ctoews'
        engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
        return engine

    def assemble_urls(photoset):
        urls = []
        for photo in photoset['photoset']['photo']:
            url = "https://farm" + str(photo['farm']) + ".staticflickr.com/" + photo['server'] + "/" + \
                  photo['id'] + "_" + photo['secret'] + ".jpg"
            urls.append(url)
        return urls

    def parse_url(url):

        try:
            userid = re.search('photos/(.+?)/', url).group(1)
        except AttributeError:
            # AAA, ZZZ not found in the original string
            userid = '' # apply your error handling

        try:
            albumid = re.search('albums/(.*)', url).group(1)
        except AttributeError:
            # AAA, ZZZ not found in the original string
            albumid = '' # apply your error handling

        return userid, albumid

    def explicit():
        from google.cloud import storage

        # Explicitly use service account credentials by specifying the private key
        # file.
        storage_client = storage.Client.from_service_account_json(
            '/Users/ctoews/Documents/Insight/Project/googleAPI/MyFirstProject-76680dcd1ad6.json')

        # Make an authenticated API request
        buckets = list(storage_client.list_buckets())
        print(buckets)

    #############################################
    # main

    #import flickr_keys
    api_key = u'37528c980c419716e0879a417ef8211c'
    api_secret = u'41075654a535c203'

    # establish connection
    flickr = flickrapi.FlickrAPI(api_key, api_secret, format='parsed-json')

    # extract user and album id
    userid, albumid = parse_url(url)

    #fetch album info
    albuminfo  = flickr.photosets.getPhotos(user_id=userid,photoset_id=albumid)

    # extract individual photo urls
    photo_urls = assemble_urls(albuminfo)

    # authenticate google
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \
    "/Users/ctoews/Documents/Insight/Project/googleAPI/MyFirstProject-76680dcd1ad6.json"

    explicit()

    # connect to Google api
    client = vision.ImageAnnotatorClient()
    image = types.Image()

    # feed photo url to Google, extract label
    all_labels = []
    for url in photo_urls:
        image.source.image_uri = url
        response = client.label_detection(image=image)
        labels = response.label_annotations
        these_labels = ''
        for label in labels:
            these_labels += (label.description + ' ')
        all_labels.append(these_labels)

    # store labels as dataframe
    all_labels = pd.DataFrame(all_labels,columns=['labels'])

    # load parser
    parser = spacy.load('en')

    # embed the set of all labels via word2vec
    all_vecs = []
    for l in all_labels.values:
        v=parser(l[0])
        all_vecs.append(v.vector)
    all_vecs = np.array(all_vecs)

    # find the average embedding (could play with weighting schemes)
    pic_vec = np.mean(all_vecs,axis=0).reshape(1,-1)

    # connect to database
    engine = connect_db()

    # extract poem embeddings
    query = "select * from poem_embeddings order by index;"
    poem_embeddings = pd.read_sql(query,engine)
    pv = poem_embeddings.iloc[:,1:].values

    # calculate cosine similarities
    s=cosine_similarity(pic_vec,pv)

    # rank the distances
    idx=np.argsort(s)
    sims = list(s[0,idx[0][:-n-1:-1]])
    print("similarities:  ", sims)

    # extract sonnet sentences from database
    query = "select * from sonnet_sentences order by index;"
    sonnet_sentences = pd.read_sql(query,engine)

    # extract relevant snippets
    best_matches = sonnet_sentences.iloc[idx[0][:-n-1:-1],:]

    # combine into single dataframe
    best_matches['similarity'] = sims
    best_matches['url']=['dog' for i in np.arange(n)] #photo_urls[0:n]
    # return as list
    best_matches = best_matches.iloc[:,1:].to_dict('records')

    return best_matches

### Everything below here are diagnostic snippets.  All can be deleted, but I'll keep them around to have a record of where I've been.

In [240]:
url = "https://www.flickr.com/photos/138072685@N02/albums/72157690932695551"
out = ModelIt(url)

[<Bucket: toews-images>]
similarities:   [0.97782495337635023, 0.96658523949610142, 0.96286365796018725, 0.95403124316771681]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [245]:
photo_urls[0]

'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg'

In [182]:
def assemble_urls(photoset):
    urls = []
    for photo in photoset['photoset']['photo']:
        url = "https://farm" + str(photo['farm']) + ".staticflickr.com/" + photo['server'] + "/" + \
              photo['id'] + "_" + photo['secret'] + ".jpg"
        urls.append(url)    
    return urls

def parse_url(url):

    try:
        userid = re.search('photos/(.+?)/', url).group(1)
    except AttributeError:
        # AAA, ZZZ not found in the original string
        userid = '' # apply your error handling

    try:
        albumid = re.search('albums/(.*)', url).group(1)
    except AttributeError:
        # AAA, ZZZ not found in the original string
        albumid = '' # apply your error handling

    return userid, albumid

def explicit():
    from google.cloud import storage

    # Explicitly use service account credentials by specifying the private key
    # file.
    storage_client = storage.Client.from_service_account_json(
        '/Users/ctoews/Documents/Insight/Project/googleAPI/MyFirstProject-76680dcd1ad6.json')

    # Make an authenticated API request
    buckets = list(storage_client.list_buckets())
    print(buckets)    

#############################################
# main

#import flickr_keys
api_key = u'37528c980c419716e0879a417ef8211c'
api_secret = u'41075654a535c203'

# establish connection
flickr = flickrapi.FlickrAPI(api_key, api_secret, format='parsed-json')

# extract user and album id
userid, albumid = parse_url(url)    

#fetch album info 
albuminfo  = flickr.photosets.getPhotos(user_id=userid,photoset_id=albumid)

# extract individual photo urls
photo_urls = assemble_urls(albuminfo)


In [184]:
photo_urls[0:4]

['https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg',
 'https://farm5.staticflickr.com/4605/39834715692_e499c7d71f.jpg',
 'https://farm5.staticflickr.com/4630/39834715602_3314a7eaf4.jpg',
 'https://farm5.staticflickr.com/4653/39834716592_efe5420940.jpg']

In [210]:
x = np.arange(5)
print(x[:-4:-1])
print(x[0:4])

[4 3 2]
[0 1 2 3]


In [211]:
photo_urls[0]

'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg'

In [130]:
best_matches

Unnamed: 0,index,title,sentence
20,20,V,"Then were not summer's distillation left, A l..."
255,255,LXIII,"Against my love shall be as I am now, With Tim..."
85,85,XX,"A woman's face with nature's own hand painted,..."
102,102,XXIV,Mine eye hath play'd the painter and hath stel...


In [107]:
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances, cosine_similarity


In [143]:
d=cosine_similarity(pic_vec,pv)

In [146]:
# rank the distances
idx=np.argsort(d)
list(d[0,idx[0][:-5:-1]] )

[0.97782495337635023,
 0.96658523949610142,
 0.96286365796018725,
 0.95403124316771681]

In [188]:
best_matches = best_matches.iloc[:,1:].to_dict('records')

In [191]:
best_matches[0]['sentence']

" Then were not summer's distillation left, A liquid prisoner pent in walls of glass, Beauty's effect with beauty were bereft, Nor it, nor no remembrance what it was"

In [212]:
from markupsafe import Markup, escape

In [220]:
x=Markup(photo_urls[0])
x

Markup('https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg')

In [204]:
x.__next__()


({'sentence': " Then were not summer's distillation left, A liquid prisoner pent in walls of glass, Beauty's effect with beauty were bereft, Nor it, nor no remembrance what it was",
  'title': 'V'},
 'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg')

In [249]:
x = ['https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg' for i in np.arange(5)]
y=photo_urls[0:5]
print(x)
print(y)

['https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg', 'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg', 'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg', 'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg', 'https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg']
['https://farm5.staticflickr.com/4623/39834715572_1559b597ec.jpg', 'https://farm5.staticflickr.com/4605/39834715692_e499c7d71f.jpg', 'https://farm5.staticflickr.com/4630/39834715602_3314a7eaf4.jpg', 'https://farm5.staticflickr.com/4653/39834716592_efe5420940.jpg', 'https://farm5.staticflickr.com/4674/39834715812_c9b8157bc5.jpg']


In [251]:
y?

In [253]:
x==y

False

In [3]:
quotes.iloc[1362,:]

NameError: name 'quotes' is not defined

In [3]:
import spacy
parser = spacy.load('en')

In [12]:
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances, cosine_similarity
a = (parser('dog').vector +parser('cat').vector)/2.0 
b = parser('dog cat').vector
cosine_similarity(a.reshape(1,-1),b.reshape(1,-1))

array([[0.8345199]], dtype=float32)