In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
plt.rcParams["figure.figsize"] = (12,7)
sns.set_style("darkgrid")

# Helperfunctions for gridline plots
from helperfunctions import get_country_specific_information, get_geojson_grid, generateBaseMap
NY_location, L_location, NY_num, L_num = get_country_specific_information()

In [6]:
# Load data used for modelling
df = pd.read_csv("data/space_embedding_data.csv", index_col=[0])
places = pd.read_csv("data/places_final.csv")

In [7]:
places.head(3)

Unnamed: 0,gPlusPlaceId,name,price,lat,lon,city,address,Grid,category
0,101742583391038750118,Carpo London,,51.509499,-0.135762,London,"16 Piccadilly, London W1J 0DE, United Kingdom",L159,Retail
1,100574642292837870712,Premium Cars,,51.514637,-0.06498,London,"10 Commercial Road Premium Cars First Floor, S...",L186,Other
2,105185983265572241970,eSpares Ltd,,51.479416,-0.179209,London,"Chelsea Wharf, 15 Lots Rd, London, Chelsea SW1...",L40,Wholesale


In [8]:
# Get embeddings from model
model = keras.models.load_model("models/continuous_model")
embeddings = pd.DataFrame(model.layers[1].get_weights()[0], index=df['Grid'])

In [9]:
embeddings.head(3)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11
Grid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
L0,0.001091,0.001297,0.000842,-0.001794,0.003415,-0.003326,-0.000462,0.003105,-0.001701,0.00238,-0.00292,0.002473
L1,0.572294,-0.959839,-0.426572,0.937521,-1.11575,1.063627,0.6889,-1.082502,0.818715,0.560528,1.058209,-0.848285
L10,-0.049071,0.037713,-0.01724,0.028672,-0.032671,0.010605,-0.007951,0.014423,0.039272,0.032083,0.018711,0.007981


In [13]:
# Assumes that user only have ratings in either London or NY
from sklearn.metrics.pairwise import cosine_similarity
IDs = ['101742583391038750118','100574642292837870712']
Ratings = [4,2]

def RecommenderSystem(IDs, Ratings):

    # Define user df
    user = pd.DataFrame({'IDs':IDs, 'Rating':Ratings})
    # Save df of visited establishments
    visited = places.loc[places.gPlusPlaceId.isin(IDs)][['gPlusPlaceId','city','Grid']]
    # Add grid cell, city to user
    user = user.merge(visited, left_on='IDs', right_on='gPlusPlaceId')
    # Save location user is from
    city = user.city.unique()[0]

    # Calculate average rating for user based on average rating user have given for each grid 
    avg_grid_rating = pd.DataFrame(user.groupby('Grid').Rating.mean())
    avg_user_rating = embeddings.loc[avg_grid_rating.index].values.T @ avg_grid_rating.values

    # Calculate the cosine similiary
    recommendation = pd.DataFrame(index = [i for i in embeddings.index if city[0] not in i])
    for grid in recommendation.index:
        recommendation.loc[grid, 'cosine_similarity'] = cosine_similarity(avg_user_rating.reshape(1,-1),embeddings.loc[grid].values.reshape(1,-1))

    if city == "London":
        recommendation['clean_index'] = [int(i[2:]) for i in recommendation.index] 

    else:
        recommendation['clean_index'] = [int(i[1:]) for i in recommendation.index] 

    grid_list = recommendation.sort_values('clean_index', ascending=True)['clean_index'].values
    similiarity_list = [0 if i not in grid_list else recommendation.loc[recommendation.clean_index == i]['cosine_similarity'].values[0] for i in range(20*20) ]
    
    # Return df similiarity to other grids
    return Plot(city, similiarity_list)
    

In [16]:
import json
import matplotlib as mpl
import folium

def Plot(city, similiarity_list):

    if city == "London":
        grid = get_geojson_grid("New York", n=NY_num-1)
        default_location = NY_location

    else:
        grid = get_geojson_grid("London", n=L_num-1)
        default_location = L_location


    m =  generateBaseMap(default_location)

    # Add GeoJson to map
    for i, box in enumerate(grid):
        geo_json = json.dumps(box)

        color = plt.cm.Greens(similiarity_list[i])
        color = mpl.colors.to_hex(color)

        gj = folium.GeoJson(geo_json,
                            style_function=lambda feature, color=color: {
                                                                            'fillColor': color,
                                                                            'color':"grey",
                                                                            'weight': 2,
                                                                            'dashArray': '1, 1',
                                                                            'fillOpacity': 0.8,
                                                                        })

        #gj.add_child(popups[i])
        m.add_child(gj)

    return m




In [18]:
m = RecommenderSystem(IDs, Ratings)
m

In [24]:
test = places.loc[places.city=='London'][['city','Grid']].drop_duplicates().reset_index()

In [26]:
test['clean_index'] = [i[1:] for i in test.Grid]

In [35]:
similiarity_list = [0 if i not in [7] else 0.9 for i in range(20*20) ]

In [36]:
similiarity_list

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0.9,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0

In [37]:
Plot("London", similiarity_list)
