# Recommender System

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
plt.rcParams["figure.figsize"] = (12,7)
sns.set_style("darkgrid")

# Helperfunctions for gridline plots
from helperfunctions import get_country_specific_information, get_geojson_grid, generateBaseMap, draw_grid
NY_location, L_location, NY_num, L_num = get_country_specific_information()

In [2]:
# Load data used for modelling
df = pd.read_csv("data/space_embedding_data.csv", index_col=[0])
places = pd.read_csv("data/places_final.csv")

In [3]:
places.head()

Unnamed: 0,gPlusPlaceId,name,price,lat,lon,city,address,Grid,category
0,101742583391038750118,Carpo London,,51.509499,-0.135762,London,"16 Piccadilly, London W1J 0DE, United Kingdom",L159,Retail
1,100574642292837870712,Premium Cars,,51.514637,-0.06498,London,"10 Commercial Road Premium Cars First Floor, S...",L186,Other
2,105185983265572241970,eSpares Ltd,,51.479416,-0.179209,London,"Chelsea Wharf, 15 Lots Rd, London, Chelsea SW1...",L40,Wholesale
3,104500852703501308358,Superdrug,,51.494537,-0.145769,London,"Unit 35, Victoria Railway Station, London SW1V...",L101,Retail
4,107519298595557659572,Kura,2.0,51.502122,-0.163029,London,"3-4 Park Close, London SW1X 7PQ, United Kingdom",L137,Restaurant


In [4]:
# Get embeddings from model
model = keras.models.load_model("models/continuous_model")
embeddings = pd.DataFrame(model.layers[1].get_weights()[0], index=df['Grid'])


In [5]:
embeddings.head(3)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11
Grid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
L0,0.001091,0.001297,0.000842,-0.001794,0.003415,-0.003326,-0.000462,0.003105,-0.001701,0.00238,-0.00292,0.002473
L1,0.572294,-0.959839,-0.426572,0.937521,-1.11575,1.063627,0.6889,-1.082502,0.818715,0.560528,1.058209,-0.848285
L10,-0.049071,0.037713,-0.01724,0.028672,-0.032671,0.010605,-0.007951,0.014423,0.039272,0.032083,0.018711,0.007981


In [61]:
# Assumes that user only have ratings in either London or NY
from sklearn.metrics.pairwise import cosine_similarity

def PushIndex(df):

    #0-18 = 22-40
    #19-37 = 43-61
    #38-56 = 64-82

    old_matrix = np.array([np.array([i for i in range(22,41)]) for j in range(19)])
    new_matrix = []
    col = 21

    for idx, x_i in enumerate(old_matrix):
        new_matrix.append(x_i + idx * col)

    mapping = dict(zip([i for i in range(0,21*21)], np.array(new_matrix).flatten().tolist()))
    df['new_index'] = df.clean_index.map(mapping)
    #similiarities = [1 if i in df.new_index.values else 0 for i in range(0,21*21)]
    similiarities = [df.loc[df.new_index==num]['cosine_similarity'].values[0] if num in df.new_index.values else 0 for num in range(0,21*21)]
    similiarities = [i if i>0.5 else 0 for i in similiarities]
    return similiarities, df



def RecommenderSystem(IDs, Ratings):

    # Define user df
    user = pd.DataFrame({'IDs':IDs, 'Rating':Ratings})
    # Save df of visited establishments
    visited = places.loc[places.gPlusPlaceId.isin(IDs)][['gPlusPlaceId','city','Grid']]
    # Add grid cell, city to user
    user = user.merge(visited, left_on='IDs', right_on='gPlusPlaceId')
    # Save location user is from
    city = user.city.unique()[0]

    # Calculate average rating for user based on average rating user have given for each grid 
    avg_grid_rating = pd.DataFrame(user.groupby('Grid').Rating.mean())
    avg_user_rating = embeddings.loc[avg_grid_rating.index].values.T @ avg_grid_rating.values

    # Calculate the cosine similiary
    recommendation = pd.DataFrame(index = [i for i in embeddings.index if city[0] not in i])
    for grid in recommendation.index:
        recommendation.loc[grid, 'cosine_similarity'] = cosine_similarity(avg_user_rating.reshape(1,-1),embeddings.loc[grid].values.reshape(1,-1))

    if city == "London":
        recommendation['clean_index'] = [int(i[2:]) for i in recommendation.index] 

    else:
        recommendation['clean_index'] = [int(i[1:]) for i in recommendation.index] 


    # Return df similiarity to other grids
    return PushIndex(recommendation) 
    

In [118]:
import json
import matplotlib as mpl
import folium

def Plot(city, similiarity_list, title):

    if city == "London":
        grid = get_geojson_grid("New York", n=NY_num)
        default_location = NY_location

    else:
        grid = get_geojson_grid("London", n=L_num)
        default_location = L_location


    m =  generateBaseMap(default_location)

    for i, box in enumerate(grid):
        geo_json = json.dumps(box)

        #print(similiarity_list[i])

        if similiarity_list[i] == 0:
            color = mpl.colors.to_hex("white")
            gj = folium.GeoJson(geo_json, style_function=lambda feature, color=color: {'color':"grey", 'weight': 0.6,'dashArray': '1, 1', 'fillOpacity': 0.1,})

        else:
            value =  (similiarity_list[i]-0.5) / (1-0.5) 
            #color = plt.cm.Greens(similiarity_list[i])
            #color = plt.cm.PuRd(similiarity_list[i])
            color = plt.cm.PuRd(value+0.5)
            color = mpl.colors.to_hex(color)

            gj = folium.GeoJson(geo_json,
                                style_function=lambda feature, color=color: {
                                                                                'fillColor': color,
                                                                                'color':"grey",
                                                                                'weight': 0.6,
                                                                                'dashArray': '1, 1',
                                                                                'fillOpacity': 0.9,
                                                                            })

        m.add_child(gj)

    title_html = '''
            <h3 align="center" style="font-size:16px"><b>{}</b></h3>
            '''.format(title)   
    m.get_root().html.add_child(folium.Element(title_html))   

    return m

In [120]:



def SaveHTML(m):

        img_data = m._to_png(5)
        img = Image.open(io.BytesIO(img_data))
        img.save('image.png')
        

### Example with user seeking places in New York

In [150]:
#import io
#from PIL import Image
import os
import time
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import imageio

def save_to_png(name,m):
        delay = 3

        path=os.getcwd()
        m.save(path+'/plots/'+name+'.html')

        #Save the map as an HTML file
        fn='/plots/{}.html'.format(name)
        tmpurl= 'file://{path}/{mapfile}'.format(path=path,mapfile=fn)
        #Open a browser window...
        browser = webdriver.Chrome(ChromeDriverManager().install())
        #..that displays the map...
        browser.get(tmpurl)
        #Give the map tiles some time to load
        time.sleep(delay)
        #Grab the screenshot
        browser.save_screenshot(path+ '/plots/{}.png'.format(name))
        #Close the browser
        browser.quit()

In [151]:
save_to_png("test",m)



Current google-chrome version is 101.0.4951
Get LATEST chromedriver version for 101.0.4951 google-chrome
Driver [/Users/akterminsprove/.wdm/drivers/chromedriver/mac64/101.0.4951.41/chromedriver] found in cache
  browser = webdriver.Chrome(ChromeDriverManager().install())


In [135]:
IDs = ['101742583391038750118','100574642292837870712']
Ratings = [1,5]
similiarities, df = RecommenderSystem(IDs, Ratings)
m = Plot('London',similiarities, 'Example of user seeking places in New York')
m

In [None]:
Click [here](./Typologi_folium.PNG)  to see the plot if it did not render. 

### Example with user seeking places in London

In [101]:
IDs = ["110741528261561607331", "101406384283536798894"]
Ratings = [4,5]
similiarities, recommendation = RecommenderSystem(IDs, Ratings)
Plot('New York',similiarities)