In [1]:
!conda install -c conda-forge folium --yes

import folium
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import pandas as pd
import numpy as np
import requests
from sklearn import preprocessing
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import math
import branca;

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    folium-0.8.3               |             py_0          54 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will be IN

In [18]:
CLIENT_ID = 'QEOHAZ20G1SBI4542GBI5XH34WBTLFEDLW5ZWC0UFLRV2VY2' # your Foursquare ID
CLIENT_SECRET = 'ZFOJPHKUTDJFR5FH0UFM5UTI0B4TO1NBOK40V1JI33CNL4ND' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 250;
radius = 1000;

LAT_STEPS = 10;
LON_STEPS = 10;
LAT_STEPSIZE = 0.03;
LON_STEPSIZE = 0.03;


In [32]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
   
    return(nearby_venues)   

def createLocationDF(CENTER_LABEL,CENTER_LAT,CENTER_LON):
    global LAT_STEPS;
    global LON_STEPS;
    global LAT_STEPSIZE;
    global LON_STEPSIZE;
    
    labels = ['Neighborhood','Latitude','Longitude'];
    neighborhoods = pd.DataFrame(columns=labels)

    neighborhoods.Neighborhood = neighborhoods.Neighborhood.astype(np.str)
    neighborhoods.Latitude = neighborhoods.Latitude.astype(np.float64)
    neighborhoods.Longitude = neighborhoods.Longitude.astype(np.float64)
    neighborhoods.set_index('Neighborhood');



    for lat in range(0,LAT_STEPS):
        for lon in range(0,LAT_STEPS):
            neighborhoods.loc[len(neighborhoods)] = ['%s %s,%s' % (CENTER_LABEL,lat,lon),
                                                     CENTER_LAT + ((lat - (LAT_STEPS / 2.0)) * LAT_STEPSIZE), 
                                                     CENTER_LON + ((lon - (LON_STEPS / 2.0)) * LON_STEPSIZE)];
    return neighborhoods;

def createNormalizeWeightDF(toronto_venues):
    # one hot encoding
    toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

    # add neighborhood column back to dataframe
    toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

    # move neighborhood column to the first column
    fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
    toronto_onehot = toronto_onehot[fixed_columns]
    toronto_grouped = toronto_onehot.groupby('Neighborhood').sum().reset_index()
    scaler = preprocessing.MinMaxScaler()
    df_normalized = toronto_grouped;
    df_normalized[df_normalized.columns[1:]] = scaler.fit_transform(df_normalized[df_normalized.columns[1:]]);
    return df_normalized;

def createMap(latitude, longitude,neighborhoods_merged):
    # create map
    map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

    # set color scheme for the clusters
    maxColors = 101;
    x = np.arange(maxColors)
    ys = [i + x + (i*x)**2 for i in range(maxColors)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    colorscale = branca.colormap.linear.YlOrRd_09.scale(0, maxColors)


    # add markers to the map
    markers_colors = []
    for lat, lon, poi, cluster in zip(neighborhoods_merged['Latitude'], neighborhoods_merged['Longitude'], neighborhoods_merged['Neighborhood'], neighborhoods_merged['Score']):
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster) + '(' + str(lat) + ',' + str(lon) + ')' , parse_html=True)
        if (math.isnan(cluster)):
            cluster = 0;
        cluster = int(cluster)
        folium.Rectangle(
            [[float(lat)-LAT_STEPSIZE/2, float(lon)-LON_STEPSIZE/2], [float(lat)+LAT_STEPSIZE/2, float(lon)+LON_STEPSIZE/2]],
            radius=5,
            popup=label,
            color=colorscale(cluster),
            fill=True,
            fill_color=colorscale(cluster),
            fill_opacity=0.7).add_to(map_clusters)
    return map_clusters;

def prepareUserProfile (df_normalized):
    entries = df_normalized.columns[1:].tolist();
    zeros = np.zeros(len(entries))
    userProfile = pd.DataFrame(data={'Score':zeros, 'LocationType': entries});
    userProfile.set_index('LocationType');
    return userProfile;

def addScoreToUserProfile(userProfile,locationType,score):
    scoreColumnIndex = 1;

    for indexNumber in userProfile[userProfile.LocationType.str.contains(locationType)].index:
        userProfile.iloc[indexNumber,scoreColumnIndex] = score;

def finalizeScoredProfile(userProfile, df_normalized):
    scoreColumnIndex = 1;
    scores = df_normalized.iloc[:,1] * 0;
    for index in range(0,len(df_normalized.columns)-1):
        scores = scores + (df_normalized.iloc[:,1+index] * userProfile.iloc[index,scoreColumnIndex]);
    df_normalized.insert(1,'Score', scores)
    df_normalized.Score =  (df_normalized.Score / df_normalized.Score.max()) * 100;
    return df_normalized;

def fetchDataFromFQ(neighborhoods):
    venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                       latitudes=neighborhoods['Latitude'],
                                       longitudes=neighborhoods['Longitude']
                                      )
    return venues;

def createPreferenceMap(neighborhoods, venues, userPreferences,CENTER_LABEL,CENTER_LAT,CENTER_LON):
    df_normalized = createNormalizeWeightDF(venues);
    userProfile = prepareUserProfile(df_normalized)

    for userPrefernce in userPreferences:
        addScoreToUserProfile(userProfile, userPrefernce[0],userPrefernce[1]);

    df_scored = finalizeScoredProfile(userProfile, df_normalized);
    # And now combine it!
    neighborhoods_merged = neighborhoods;
    neighborhoods_merged = neighborhoods_merged.join(df_scored.set_index('Neighborhood'), on='Neighborhood')
    map_clusters = createMap(CENTER_LAT, CENTER_LON, neighborhoods_merged);
    return map_clusters;


## Next get the location based data from FourSquare

In [19]:
# as obtained from https://simplemaps.com/data/ca-cities
#['Toronto', '43.65', '-79.38'];
#['Montréal', '45.5', '-73.58'];
#['Vancouver', '49.25', '-123.13'];

#CENTER_LABEL = 'Toronto';
#CENTER_LAT = 43.70;
#CENTER_LON = -79.43;

CENTER_LABEL = 'Montréal';
CENTER_LAT = 45.5;
CENTER_LON = -73.58;



neighborhoods = createLocationDF(CENTER_LABEL,CENTER_LAT,CENTER_LON)
venues = fetchDataFromFQ(neighborhoods);
venues

Montréal 0,0
Montréal 0,1
Montréal 0,2
Montréal 0,3
Montréal 0,4
Montréal 0,5
Montréal 0,6
Montréal 0,7
Montréal 0,8
Montréal 0,9
Montréal 1,0
Montréal 1,1
Montréal 1,2
Montréal 1,3
Montréal 1,4
Montréal 1,5
Montréal 1,6
Montréal 1,7
Montréal 1,8
Montréal 1,9
Montréal 2,0
Montréal 2,1
Montréal 2,2
Montréal 2,3
Montréal 2,4
Montréal 2,5
Montréal 2,6
Montréal 2,7
Montréal 2,8
Montréal 2,9
Montréal 3,0
Montréal 3,1
Montréal 3,2
Montréal 3,3
Montréal 3,4
Montréal 3,5
Montréal 3,6
Montréal 3,7
Montréal 3,8
Montréal 3,9
Montréal 4,0
Montréal 4,1
Montréal 4,2
Montréal 4,3
Montréal 4,4
Montréal 4,5
Montréal 4,6
Montréal 4,7
Montréal 4,8
Montréal 4,9
Montréal 5,0
Montréal 5,1
Montréal 5,2
Montréal 5,3
Montréal 5,4
Montréal 5,5
Montréal 5,6
Montréal 5,7
Montréal 5,8
Montréal 5,9
Montréal 6,0
Montréal 6,1
Montréal 6,2
Montréal 6,3
Montréal 6,4
Montréal 6,5
Montréal 6,6
Montréal 6,7
Montréal 6,8
Montréal 6,9
Montréal 7,0
Montréal 7,1
Montréal 7,2
Montréal 7,3
Montréal 7,4
Montréal 7,5
Montréal 7,6

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Montréal 0,0",45.35,-73.73,Morsels By Mark,45.351379,-73.734732,Dessert Shop
1,"Montréal 0,0",45.35,-73.73,Plomberie Michel Labelle Inc,45.350510,-73.735576,Construction & Landscaping
2,"Montréal 0,8",45.35,-73.49,Montreal South KOA,45.345979,-73.489062,Campground
3,"Montréal 1,0",45.38,-73.73,Salsa Verdun - Bord de l'eau,45.377136,-73.731081,Dance Studio
4,"Montréal 1,4",45.38,-73.61,Boutique dollar et plus,45.378564,-73.614433,Department Store
5,"Montréal 1,6",45.38,-73.55,Cinéma Cineplex Odeon Delson,45.381242,-73.550163,Multiplex
6,"Montréal 1,6",45.38,-73.55,Tim Hortons,45.383611,-73.548364,Coffee Shop
7,"Montréal 1,6",45.38,-73.55,Pharmaprix,45.384264,-73.549216,Pharmacy
8,"Montréal 1,6",45.38,-73.55,SAQ,45.382287,-73.548842,Liquor Store
9,"Montréal 1,7",45.38,-73.52,Parc Montcalm,45.378834,-73.520790,Soccer Field


## Let the user create his preferences as a number score and create the matching map
### With the helper function createPreferenceMap, all the internal data cleaning is hidden from the user and hence not directly visible

In [33]:

userPreferences = [];
userPreferences.append(['Bar',7])
userPreferences.append(['Café',4])
userPreferences.append(['Coffee Shop',6])
userPreferences.append(['Athletics & Sports',3])
userPreferences.append(['Burger Joint',4])
userPreferences.append(['Burrito Place',5])
userPreferences.append(['Restaurant',5])
userPreferences.append(['Bus',2])


map_scored = createPreferenceMap(neighborhoods,venues,userPreferences,CENTER_LABEL,CENTER_LAT,CENTER_LON);


In [34]:
#now display the generated map
map_scored


In [8]:
userPreferences

[['American Restaurant', 4], ['Bar', 11], ['Coffee Shop', 10]]

In [9]:
# Some copy & Paste code for reporting
df_normalized = createNormalizeWeightDF(venues);
userProfile = prepareUserProfile(df_normalized)

for userPrefernce in userPreferences:
    addScoreToUserProfile(userProfile, userPrefernce[0],userPrefernce[1]);

df_scored = finalizeScoredProfile(userProfile, df_normalized);


In [11]:
df_scored.head()

Unnamed: 0,Neighborhood,Score,American Restaurant,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Automotive Shop,BBQ Joint,Bagel Shop,...,Theme Park Ride / Attraction,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Weight Loss Center,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Toronto 0,0",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Toronto 1,0",5.91716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Toronto 2,1",5.91716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Toronto 2,2",20.56213,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Toronto 3,0",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
userProfile

Unnamed: 0,LocationType,Score
0,American Restaurant,4.0
1,Art Gallery,0.0
2,Asian Restaurant,0.0
3,Athletics & Sports,0.0
4,Auto Dealership,0.0
5,Automotive Shop,0.0
6,BBQ Joint,0.0
7,Bagel Shop,0.0
8,Bakery,0.0
9,Bank,0.0
