# Recommender System

In [37]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import requests
import pickle
from tensorflow import keras
plt.rcParams["figure.figsize"] = (12,7)
sns.set_style("darkgrid")

# Helperfunctions for gridline plots
from helperfunctions import get_country_specific_information, get_geojson_grid, generateBaseMap#, save_to_png
NY_location, L_location, NY_num, L_num = get_country_specific_information()

In [2]:
# Load data used for modelling
df = pd.read_csv("data/space_embedding_data_NLP.csv", index_col=[0])
places = pd.read_csv("data/places_final.csv")

In [3]:
# Get embeddings from model
#model = keras.models.load_model("models/continuous_model")
#embeddings = pd.DataFrame(model.layers[1].get_weights()[0], index=df['Grid'])

In [4]:
embeddings=pd.read_pickle("deployment/serialized/discrete_embeddings.pkl")

In [5]:
embeddings=pd.DataFrame(embeddings).T
embeddings.head()

Unnamed: 0,0,1,2,3,4,5
L0,0.001433,-0.012677,0.006213,-0.000165,0.008206,0.011099
L1,0.269152,3.460942,-1.458247,-2.876282,-1.317084,-5.724437
L10,-0.002178,0.019951,0.0365,-0.011969,0.013564,0.027723
L100,-0.032761,-0.034071,-0.022443,-0.023177,-0.04993,-0.015488
L101,0.00777,-0.049722,0.018263,-0.047544,0.024548,-0.039448


In [6]:
# Assumes that user only have ratings in either London or NY
from sklearn.metrics.pairwise import cosine_similarity

def PushIndex(df):

    #0-18 = 22-40
    #19-37 = 43-61
    #38-56 = 64-82

    old_matrix = np.array([np.array([i for i in range(22,41)]) for j in range(19)])
    new_matrix = []
    col = 21

    for idx, x_i in enumerate(old_matrix):
        new_matrix.append(x_i + idx * col)

    mapping = dict(zip([i for i in range(0,21*21)], np.array(new_matrix).flatten().tolist()))
    df['new_index'] = df.clean_index.map(mapping)
    #similiarities = [1 if i in df.new_index.values else 0 for i in range(0,21*21)]
    similiarities = [df.loc[df.new_index==num]['cosine_similarity'].values[0] if num in df.new_index.values else 0 for num in range(0,21*21)]
    similiarities = [i if i>0.5 else 0 for i in similiarities]
    return similiarities, df



def RecommenderSystem(IDs, Ratings):

    # Define user df
    user = pd.DataFrame({'IDs':IDs, 'Rating':Ratings})
    # Save df of visited establishments
    visited = places.loc[places.gPlusPlaceId.isin(IDs)][['gPlusPlaceId','city','Grid']]
    # Add grid cell, city to user
    user = user.merge(visited, left_on='IDs', right_on='gPlusPlaceId')
    # Save location user is from
    city = user.city.unique()[0]

    # Calculate average rating for user based on average rating user have given for each grid 
    avg_grid_rating = pd.DataFrame(user.groupby('Grid').Rating.mean())
    avg_user_rating = embeddings.loc[avg_grid_rating.index].values.T @ avg_grid_rating.values

    # Calculate the cosine similiary
    recommendation = pd.DataFrame(index = [i for i in embeddings.index if city[0] not in i])
    for grid in recommendation.index:
        recommendation.loc[grid, 'cosine_similarity'] = cosine_similarity(avg_user_rating.reshape(1,-1),embeddings.loc[grid].values.reshape(1,-1))

    if city == "London":
        recommendation['clean_index'] = [int(i[2:]) for i in recommendation.index] 

    else:
        recommendation['clean_index'] = [int(i[1:]) for i in recommendation.index] 

    
    # Return df similiarity to other grids
    return PushIndex(recommendation) 
    

In [7]:
import json
import matplotlib as mpl
import folium

def Plot(city, similiarity_list, title):

    if city == "London":
        grid = get_geojson_grid("New York", n=NY_num)
        default_location = NY_location

    else:
        grid = get_geojson_grid("London", n=L_num)
        default_location = L_location


    m =  generateBaseMap(default_location)

    for i, box in enumerate(grid):
        geo_json = json.dumps(box)

        #print(similiarity_list[i])

        if similiarity_list[i] == 0:
            color = mpl.colors.to_hex("white")
            gj = folium.GeoJson(geo_json, style_function=lambda feature, color=color: {'color':"grey", 'weight': 0.6,'dashArray': '1, 1', 'fillOpacity': 0.1,})

        else:
            value =  (similiarity_list[i]-0.5) / (1-0.5) 
            #color = plt.cm.Greens(similiarity_list[i])
            #color = plt.cm.PuRd(similiarity_list[i])
            color = plt.cm.PuRd(value+0.5)
            color = mpl.colors.to_hex(color)

            gj = folium.GeoJson(geo_json,
                                style_function=lambda feature, color=color: {
                                                                                'fillColor': color,
                                                                                'color':"grey",
                                                                                'weight': 0.6,
                                                                                'dashArray': '1, 1',
                                                                                'fillOpacity': 0.9,
                                                                            })

        m.add_child(gj)

    title_html = '''
            <h3 align="center" style="font-size:16px"><b>{}</b></h3>
            '''.format(title)   
    m.get_root().html.add_child(folium.Element(title_html))   

    return m

### Example with user seeking places in New York

In [8]:
#import io
#from PIL import Image
import os
import time
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import imageio

def save_to_png(name,m):
        delay = 3

        path=os.getcwd()
        m.save(path+'/plots/'+name+'.html')

        #Save the map as an HTML file
        fn='/plots/{}.html'.format(name)
        tmpurl= 'file://{path}/{mapfile}'.format(path=path,mapfile=fn)
        #Open a browser window...
        browser = webdriver.Chrome(ChromeDriverManager().install())
        #..that displays the map...
        browser.get(tmpurl)
        #Give the map tiles some time to load
        time.sleep(delay)
        #Grab the screenshot
        browser.save_screenshot(path+ '/plots/{}.png'.format(name))
        #Close the browser
        browser.quit()

In [9]:
#Persona 1 from london - middle age man
persona_L1 = ['117894493394086195117', '104063119900400467553', '109257478465059465681', '108826432794361150085', '108474869308709310130']
L1_ratings = [4, 5, 5, 1, 3]

# persona 2 from london, hipster in 20's
persona_L2 =  ['101414550408078459025', '101222111970032450108',  '110867761419823189329', '102526759936228897422', '105899272211447947388']
L2_ratings = [5, 4, 4, 2 ,1 ]

# persona 3 from london, working class guy
persona_L3  = ['117142684046048778874', '114202385542308064803', '111040899007592704996', '116048702018782286233', '101959599981305942585']
L3_ratings = [5, 4, 4, 1, 1]

### Persona 1 

In [10]:
IDs =  persona_L1
Ratings = L1_ratings
similiarities, df = RecommenderSystem(IDs, Ratings)
m = Plot('London',similiarities, 'Persona 1 - London to New York')
#save_to_png("Persona1_NY",m)
m

If the folium map did not render, click this [link](img/Persona1_NY.png) to see the map.

### Persona 2 

In [11]:
IDs =  persona_L2
Ratings = L2_ratings
similiarities, df = RecommenderSystem(IDs, Ratings)
m = Plot('London',similiarities, 'Persona 2 - London to New York')
#save_to_png("Persona2_NY",m)
m

If the folium map did not render, click this [link](img/Persona2_NY.png) to see the map.

### Persona 3 

In [12]:
IDs =  persona_L3
Ratings = L3_ratings
similiarities, df = RecommenderSystem(IDs, Ratings)
m = Plot('London',similiarities, 'Persona 3 - London to New York')
#save_to_png("Persona3_NY",m)
m

If the folium map did not render, click this [link](img/Persona3_NY.png) to see the map.

### NY version of persona 2

In [13]:
#New Yorker version of the hipster
NY1 = ['113977789460220411709',  '101274427290837781011', '106861653242718638090', '103027305365171139191', '113219169863257979653', '113312551325632714336', '106431782820267132017']
Rating_N1 = [4,5, 1, 2, 1,1, 2]

In [14]:
IDs = NY1
Ratings = Rating_N1
similiarities, recommendation = RecommenderSystem(IDs, Ratings)
m = Plot('New York',similiarities, 'Persona 2 - New York to London')
#save_to_png("Persona2_L",m)
m

If the folium map did not render, click this [link](img/Persona2_L.png) to see the map.

### Calling Grace for Persona 1

In [32]:
feature_names = ['IDs', 'Rating']
IDs = persona_L1
Ratings = L1_ratings

# Pythonic way to transpose list of lists 
data = list(map(list, zip(IDs, Ratings)))
# Construct data for endpoint
endpoint = "https://models.grace-dtu.2021.services/seldon/project-spaceembeddings/recommender-system/api/v0.1/predictions"
headers = {'Grace-Client-Secret': 'c0de6747-ffb6-4023-913f-53c8222435bb'}
payload = {"data": {"names": feature_names,
                    "ndarray": data}}

# Request response from endpoint
response = requests.post(endpoint, json=payload, headers=headers)

print(response.status_code)
#print(response.json())

200


In [33]:
# Get index of result
grids = response.json()['data']['ndarray']
Grace = pd.DataFrame({'clean_index':[int(i[2:]) for i in grids], "cosine_similarity": np.ones(len(grids))}, index=grids)
similiarities, df = PushIndex(Grace)

In [38]:
# Plot result
m = Plot('New York',similiarities, 'Persona 1 - London to New York using Grace')
save_to_png("Persona1_NY_Grace",m)
m

FileNotFoundError: [Errno 2] No such file or directory: '/Users/akterminsprove/Desktop/DTU/Kandidat - Business Analytics/2. Semester/42578 Advanced Business Analytics/GoogleLocalReviews/plots/Persona1_NY_Grace.html'