Create neighborhood dataframe from Excel – neighborhood, lat, long

In [1]:
# Import pandas library using an alias
import pandas as pd
# library to handle data in a vectorized manner
import numpy as np

# library to handle JSON files
import json
# library to handle requests
import requests
# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# import geocoder
import geocoder
# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# map rendering library
import folium

In [2]:
df_chi_data = pd.read_excel('CCASF12010CMAP.xlsx')
df_chi_data.head()

Unnamed: 0,GEOGKEYX,GEOGNAME,LATITUDE,UNSIGNED LONGITUDE,LONGITUDE
0,GeogKey,Geog,,,
1,1,Rogers Park,42.016667,87.666667,-87.666667
2,2,West Ridge,42.0,87.683333,-87.683333
3,3,Uptown,41.966667,87.666667,-87.666667
4,4,Lincoln Square,41.966667,87.683333,-87.683333


In [3]:
df_chi_data.drop([0], axis = 0, inplace=True)
df_chi_data.drop(['UNSIGNED LONGITUDE'], axis=1, inplace=True)
df_chi_data.drop(['GEOGKEYX'], axis=1, inplace=True)
df_chi_data.head()

Unnamed: 0,GEOGNAME,LATITUDE,LONGITUDE
1,Rogers Park,42.016667,-87.666667
2,West Ridge,42.0,-87.683333
3,Uptown,41.966667,-87.666667
4,Lincoln Square,41.966667,-87.683333
5,North Center,41.95,-87.683333


Create empty venue df – neighborhood, venue name, venue id, venue lat, venue long, distance from neighborhood center, category, price

In [4]:
df_venues = pd.DataFrame(columns=['Neighborhood','VenueName','VenueID','Latitude','Longitude','Distance','Category','Price'])
df_venues

Unnamed: 0,Neighborhood,VenueName,VenueID,Latitude,Longitude,Distance,Category,Price


Loop through neighborhoods

search for venues by long/lat
use json to populate venue df

In [5]:
# define Foursquare variables
CLIENT_ID = 'CT3K4Z2AEBTWGOKQLQKZ135JJ3B44KOQTB4BMEJ4R0AXXWSD' # your Foursquare ID
CLIENT_SECRET = 'TSJKVMWFPKG2ZV4NYZQDVIR5FIYOSRICDTHHKHWSMV5JHDMZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [6]:
# create a function to repeat the same process to all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius, LIMIT, CATEGORY, INTENT):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&categoryId={}&radius={}&limit={}&intent={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng,
            VERSION,
            CATEGORY, 
            radius, 
            LIMIT, 
            INTENT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            v['name'], 
            v['id'], 
            v['location']['lat'], 
            v['location']['lng'],  
            v['location']['distance'],  
            v['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Venue', 
                  'Venue ID', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Distance',
                  'Venue Category']
    
    return(nearby_venues)

In [7]:
chi_venues = getNearbyVenues(names=df_chi_data['GEOGNAME'],
                                   latitudes=df_chi_data['LATITUDE'],
                                   longitudes=df_chi_data['LONGITUDE'],
                                   radius=500,
                                   LIMIT=100, 
                                   CATEGORY = '4d4b7105d754a06374d81259',
                                   INTENT = 'browse'
                                  )
chi_venues.head()

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
0,Rogers Park,Charmers Cafe,5710dcf0498e87c71d20b69d,42.016164,-87.66825,142,Café
1,Rogers Park,Caribbean American Bakery,4b5dde6ef964a520fa7029e3,42.019371,-87.669705,392,Bakery
2,Rogers Park,Tjam Kitchen,5a2071ca47f876422319a3b6,42.01931,-87.66692,294,Restaurant
3,Rogers Park,Jarvis Grill,4c117c7e17002d7f4755e609,42.015989,-87.66888,198,Fast Food Restaurant
4,Rogers Park,Jamaican Bakery,51cf7f97498ee7d50a505393,42.018398,-87.669414,297,Bakery


In [8]:
chi_venues.shape

(527, 7)

Remove chain restaurants (those with more than 5 locations)

In [9]:
chi_venues_grouped = chi_venues.groupby('Venue').filter(lambda x: len(x) > 5)
chi_venues_grouped

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
90,Uptown,Subway,4b5b3875f964a52031ec28e3,41.965282,-87.661418,461,Sandwich Place
121,Lincoln Square,Starbucks,4aa3dfaaf964a520384420e3,41.964799,-87.685861,294,Coffee Shop
128,Lincoln Square,Potbelly Sandwich Shop,49f4c21ff964a5204b6b1fe3,41.966985,-87.687272,327,Sandwich Place
143,Lincoln Square,Dunkin',4c52d9412543a593290bfc85,41.966271,-87.688664,443,Donut Shop
166,North Center,Starbucks,54273f56498e550c0584a8bb,41.947936,-87.688509,486,Coffee Shop
169,North Center,Potbelly Sandwich Shop,542eed7c498e15b89f63ad00,41.948428,-87.688678,475,Sandwich Place
179,Lake View,Dunkin',4b5b6256f964a520aef928e3,41.954298,-87.650165,478,Donut Shop
180,Lake View,Dunkin',532488dc498e89b38c11821c,41.947083,-87.653812,452,Donut Shop
185,Lake View,Subway,4bd4d3736798ef3bbf80628d,41.94722,-87.653939,449,Sandwich Place
202,Lake View,Subway,4a42e581f964a5205da61fe3,41.951745,-87.649436,199,Sandwich Place


In [10]:
chi_venues = chi_venues[chi_venues.Venue != "Dunkin'"]
chi_venues = chi_venues[chi_venues.Venue != "Potbelly Sandwich Shop"]
chi_venues = chi_venues[chi_venues.Venue != "Starbucks"]
chi_venues = chi_venues[chi_venues.Venue != "Subway"]
chi_venues.shape

(484, 7)

Remove duplicates from venue df based on distance from neighborhood center (maybe sort by distance then keep first)

In [11]:
chi_venues.duplicated('Venue ID')
chi_venues_duplicates = chi_venues[chi_venues.duplicated(['Venue ID'])]
 
print("Duplicate venues are:", chi_venues_duplicates, sep='\n')

Duplicate venues are:
Empty DataFrame
Columns: [Neighborhood, Venue, Venue ID, Venue Latitude, Venue Longitude, Distance, Venue Category]
Index: []


Reset index after removing rows

In [15]:
chi_venues = chi_venues.reset_index(drop=True)
chi_venues.head()

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
0,Rogers Park,Charmers Cafe,5710dcf0498e87c71d20b69d,42.016164,-87.66825,142,Café
1,Rogers Park,Caribbean American Bakery,4b5dde6ef964a520fa7029e3,42.019371,-87.669705,392,Bakery
2,Rogers Park,Tjam Kitchen,5a2071ca47f876422319a3b6,42.01931,-87.66692,294,Restaurant
3,Rogers Park,Jarvis Grill,4c117c7e17002d7f4755e609,42.015989,-87.66888,198,Fast Food Restaurant
4,Rogers Park,Jamaican Bakery,51cf7f97498ee7d50a505393,42.018398,-87.669414,297,Bakery


In [12]:
# how many venues for each neighborhood
chi_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Edgewater,31,31,31,31,31,31
Humboldt Park,15,15,15,15,15,15
Lake View,26,26,26,26,26,26
Lincoln Park,37,37,37,37,37,37
Lincoln Square,43,43,43,43,43,43
Logan Square,30,30,30,30,30,30
Lower West Side,23,23,23,23,23,23
Near North Side,45,45,45,45,45,45
Near South Side,30,30,30,30,30,30
Near West Side,16,16,16,16,16,16


In [13]:
# how many unique categories
print('There are {} uniques categories.'.format(len(chi_venues['Venue Category'].unique())))

There are 88 uniques categories.


In [65]:
chi_venues['Venue ID'].isnull().sum().sum()

0

In [68]:
chi_venues.to_excel('ChicagoVenues.xlsx')

Use one-hot encoding on categories and load into new df (copy venue df first – 1he)

In [71]:
# one hot encoding
chi_onehot_cat = pd.get_dummies(chi_venues[['Venue Category']], prefix="", prefix_sep="")
chi_onehot_cat.head()

Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
chi_onehot_cat.shape

(484, 88)

In [73]:
# add venue ID column back to dataframe
chi_onehot_cat['Venue ID'] = chi_venues['Venue ID']
chi_onehot_cat.head()

Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,...,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint,Venue ID
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5710dcf0498e87c71d20b69d
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,4b5dde6ef964a520fa7029e3
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5a2071ca47f876422319a3b6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4c117c7e17002d7f4755e609
4,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,51cf7f97498ee7d50a505393


In [74]:
chi_onehot_cat.shape

(484, 89)

In [75]:
# move venue ID to the first column
fixed_columns = [chi_onehot_cat.columns[-1]] + list(chi_onehot_cat.columns[:-1])
chi_onehot_cat = chi_onehot_cat[fixed_columns]
chi_onehot_cat.head()

Unnamed: 0,Venue ID,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
0,5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [76]:
chi_onehot_cat.shape

(484, 89)

In [77]:
chi_onehot_cat.to_excel('ChiCatOneHot.xlsx')

In [78]:
chi_onehot_cat['Venue ID'].isnull().sum().sum()

0

In [79]:
null_cat = chi_onehot_cat[chi_onehot_cat['Venue ID'].isnull()]
null_cat

Unnamed: 0,Venue ID,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint


In [70]:
chi_onehot_cat.dropna(axis='index', how='any', subset=['Venue ID'], inplace=True)
chi_onehot_cat.shape

(443, 89)

Use one-hot encoding on neighborhoods

In [18]:
# one hot encoding
chi_onehot_nhood = pd.get_dummies(chi_venues[['Neighborhood']], prefix="", prefix_sep="")
chi_onehot_nhood.head()

Unnamed: 0,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,North Center,Rogers Park,The Loop,Uptown,West Ridge,West Town
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [19]:
# add venue ID column back to dataframe
chi_onehot_nhood['Venue ID'] = chi_venues['Venue ID']
# move venue ID to the first column
fixed_columns = [chi_onehot_nhood.columns[-1]] + list(chi_onehot_nhood.columns[:-1])
chi_onehot_nhood = chi_onehot_nhood[fixed_columns]

chi_onehot_nhood.head()

Unnamed: 0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,North Center,Rogers Park,The Loop,Uptown,West Ridge,West Town
0,5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [59]:
chi_onehot_nhood.shape

(484, 17)

Combine one-hot encoding dfs for categories and neighborhoods

In [80]:
chi_onehot_all = pd.merge(chi_onehot_nhood, chi_onehot_cat, on='Venue ID', how='outer')
chi_onehot_all.head()

Unnamed: 0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
0,5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [81]:
chi_onehot_all.shape

(484, 105)

Get user ratings of a venue

Loop thru venue df

search users who liked a venue ??
use json to populate user df

In [25]:
# create a function to get users who liked each venue
def getVenueLikes(venueids):
    
    venues_list=[]
    for venue_id in venueids:
        #print(venue_id)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()['response']['likes']
        
        # return price for each venue
        if "items" in results:
            for i in results['items']:
                venues_list.append([(venue_id, i['id'], 1) ])
        else:
            print("No likes for venue {}.".format(venue_id))

    venue_likes = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    venue_likes.columns = ['Venue ID', 'User ID', 'Rating']
    
    return(venue_likes)

In [82]:
chi_venue_likes = getVenueLikes(venueids=chi_venues['Venue ID'])
chi_venue_likes.head()

No likes for venue 51cf7f97498ee7d50a505393.
No likes for venue 501c2f2ee4b08947b4b93ad9.
No likes for venue 4f3235f519836c91c7c11167.
No likes for venue 55ad8b02498ee9a4d38f3437.
No likes for venue 4e16e1bdfa76a474496f5683.
No likes for venue 5765f851498ec7182331d0cc.
No likes for venue 53bb6e86498e01e11c501cb4.
No likes for venue 4f32480719836c91c7c83b66.
No likes for venue 5223916d11d27dd32e355ce5.
No likes for venue 50270532e4b0e6861f0acf73.
No likes for venue 56ae52ef498eed4822e4a4e4.
No likes for venue 4d8a63d04757721ee437b669.
No likes for venue 4f2741c3e4b067a32993f36f.
No likes for venue 55b220d4498ec1a2f280dc45.
No likes for venue 5b65327bcb3fd200391472e5.
No likes for venue 4b453428f964a520e10726e3.
No likes for venue 50f22f89e4b01e115acf4e98.
No likes for venue 4f44903019836ed00194e7fa.
No likes for venue 54a35873498e9be465ba3336.
No likes for venue 53d5b1d8498e5d7ed7628d9a.
No likes for venue 5554daea498e2644fc6f6f5c.
No likes for venue 4b8b1e8ff964a520c29332e3.
No likes f

Unnamed: 0,Venue ID,User ID,Rating
0,5710dcf0498e87c71d20b69d,515900,1
1,5710dcf0498e87c71d20b69d,229698,1
2,5710dcf0498e87c71d20b69d,51646211,1
3,4b5dde6ef964a520fa7029e3,50475374,1
4,4b5dde6ef964a520fa7029e3,2967333,1


In [83]:
chi_venue_likes.shape

(901, 3)

In [29]:
chi_venue_likes.groupby('User ID').filter(lambda x: len(x) > 1) 

Unnamed: 0,Venue ID,User ID,Rating
2,5710dcf0498e87c71d20b69d,51646211,1
8,4c117c7e17002d7f4755e609,15833163,1
13,561155b9498ec959cc2c09f1,38216300,1
14,574d9666498ea9ee06354ffb,2118380,1
15,5150cea5e4b0b55ccb5056f8,2118380,1
28,4c43b571429a0f4769d6471e,439239999,1
33,4b08e160f964a520171323e3,26541006,1
34,4b08e160f964a520171323e3,158962747,1
36,4c3921280a71c9b62a0e42c9,407913448,1
40,4c584204d12a20a13a8a68bd,439239999,1


Pick a test user.
Save test user ratings from user ratings to new df.

In [30]:
test_user_ratings = chi_venue_likes[chi_venue_likes['User ID'] == '30596207'].drop('User ID', 1)
test_user_ratings

Unnamed: 0,Venue ID,Rating
173,5786fed8498e6ad5339dc258,1
339,4c6899d9428a0f47867e001b,1
862,4c8cedadb118b7131c921d01,1
894,56f1adc5498eeb31c9ecfdef,1


In [31]:
#Resetting the index to avoid future issues
test_user_ratings = test_user_ratings.reset_index(drop=True)

In [32]:
test_user_ratings

Unnamed: 0,Venue ID,Rating
0,5786fed8498e6ad5339dc258,1
1,4c6899d9428a0f47867e001b,1
2,4c8cedadb118b7131c921d01,1
3,56f1adc5498eeb31c9ecfdef,1


Create new venue df of those the test user has rated (based on one-hot encoding df).

In [33]:
test_user_likes =  chi_onehot_all[chi_onehot_all['Venue ID'].isin(test_user_ratings['Venue ID'].tolist())]
test_user_likes

Unnamed: 0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
132,5786fed8498e6ad5339dc258,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
191,4c6899d9428a0f47867e001b,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
428,4c8cedadb118b7131c921d01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
440,56f1adc5498eeb31c9ecfdef,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Drop unnecessary columns

In [34]:
#Resetting the index to avoid future issues
test_user_likes = test_user_likes.reset_index(drop=True)
#Dropping unnecessary issues due to save memory and to avoid issues
test_user_features = test_user_likes.drop('Venue ID', 1)
test_user_features

Unnamed: 0,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Create vector of test user’s ratings

In [35]:
test_user_ratings['Rating']

0    1
1    1
2    1
3    1
Name: Rating, dtype: int64

Use dot product of vector and test user’s ratings to build test user profile

In [36]:
#Dot produt to get weights
userProfile = test_user_features.transpose().dot(test_user_ratings['Rating'])
#The user profile
userProfile.shape

Edgewater                          2
Humboldt Park                      0
Lake View                          0
Lincoln Park                       1
Lincoln Square                     1
Logan Square                       0
Lower West Side                    0
Near North Side                    0
Near South Side                    0
Near West Side                     0
North Center                       0
Rogers Park                        0
The Loop                           0
Uptown                             0
West Ridge                         0
West Town                          0
Afghan Restaurant                  0
African Restaurant                 0
American Restaurant                0
Arcade                             0
Argentinian Restaurant             0
Asian Restaurant                   0
BBQ Joint                          0
Bagel Shop                         0
Bakery                             0
Bar                                0
Bookstore                          0
B

Let's start by extracting the category table from the original dataframe:

In [37]:
#Now let's get the features of every restaurant in our original dataframe
features_df = chi_onehot_all.set_index(chi_onehot_all['Venue ID'])
features_df.head()

Unnamed: 0_level_0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4c117c7e17002d7f4755e609,4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Drop unnecessary columns from 1he df

In [38]:
#And drop the unnecessary information
features_df = features_df.drop('Venue ID', 1)
features_df.head()

Unnamed: 0_level_0,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


With the input's profile and the complete list of venues and their categories/neighborhoods in hand, we're going to take the weighted average of every venue based on the input profile and recommend the top ten restuarants that most satisfy it.

In [102]:
#Multiply the features by the weights and then take the weighted average
recommendationTable_df = ((features_df*userProfile).sum(axis=1))/(userProfile.sum())
#Sort our recommendations in descending order
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)
recommendationTable_df.head()

Venue ID
4df7f3f118a801cd9f119204    0.25
56316c6a498ebdd0e0e529e4    0.25
5ba6563e33e118002c8eb30a    0.25
4176fc00f964a520b51d1fe3    0.25
4b59c45df964a520fa9628e3    0.25
dtype: float64

In [103]:
#The final recommendation table
recommended_venues = chi_venues.loc[chi_venues['Venue ID'].isin(recommendationTable_df.head(5).keys())]
recommended_venues

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
232,Near North Side,Happy Camper,56316c6a498ebdd0e0e529e4,41.904306,-87.63437,486,Pizza Place
239,Near North Side,Domino's Pizza,4b59c45df964a520fa9628e3,41.903763,-87.633175,419,Pizza Place
240,Near North Side,Tavern on Rush,4176fc00f964a520b51d1fe3,41.901684,-87.628,480,American Restaurant
248,Near North Side,Walton Street Kitchen + Bar,5ba6563e33e118002c8eb30a,41.899826,-87.628329,415,American Restaurant
249,Near North Side,Farmhouse Chicago,4df7f3f118a801cd9f119204,41.896726,-87.635361,401,American Restaurant


Add suggested ratings

In [104]:
recommended_ratings = pd.merge(recommended_venues, recommendationTable_df.rename('Rating'), left_on='Venue ID', right_index=True)
recommended_ratings

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category,Rating
232,Near North Side,Happy Camper,56316c6a498ebdd0e0e529e4,41.904306,-87.63437,486,Pizza Place,0.25
239,Near North Side,Domino's Pizza,4b59c45df964a520fa9628e3,41.903763,-87.633175,419,Pizza Place,0.25
240,Near North Side,Tavern on Rush,4176fc00f964a520b51d1fe3,41.901684,-87.628,480,American Restaurant,0.25
248,Near North Side,Walton Street Kitchen + Bar,5ba6563e33e118002c8eb30a,41.899826,-87.628329,415,American Restaurant,0.25
249,Near North Side,Farmhouse Chicago,4df7f3f118a801cd9f119204,41.896726,-87.635361,401,American Restaurant,0.25


Drop unnecessary columns

In [43]:
recommended_venues = recommended_ratings.drop('Venue ID', 1).drop('Distance', 1)
recommended_venues

Unnamed: 0,Neighborhood,Venue,Venue Latitude,Venue Longitude,Venue Category,Rating
129,Lincoln Square,The Book Cellar,41.967656,-87.687868,Bookstore,0.25
220,Lincoln Park,Nookies Too Restaurant,41.920442,-87.648653,Breakfast Spot,0.25
453,Edgewater,Replay Beer & Bourbon,41.979761,-87.668434,Whisky Bar,0.25
454,Edgewater,Colectivo Coffee Roasters,41.980619,-87.668247,Coffee Shop,0.25
456,Edgewater,Calo Ristorante,41.979427,-87.668237,Italian Restaurant,0.25


Plot the top 5

In [44]:
# get coordinates for Chicago
address = 'Chicago, IL'

geolocator = Nominatim(user_agent="chi_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Chicago are 41.8755616, -87.6244212.


In [45]:
# create map
recommendation_map = folium.Map(location=[latitude, longitude], zoom_start=11)

In [46]:
# add markers to the map
for lat, lon, name, cat, rat in zip(recommended_venues['Venue Latitude'], recommended_venues['Venue Longitude'], recommended_venues['Venue'], recommended_venues['Venue Category'], recommended_venues['Rating']):
    label = folium.Popup(str(name) + ' (' + str(cat) + ', ' + str(rat) + ')', parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(recommendation_map)
       
recommendation_map

Get recommendations for a different user

In [84]:
test_user_ratings = chi_venue_likes[chi_venue_likes['User ID'] == '26541006'].drop('User ID', 1)
test_user_ratings

Unnamed: 0,Venue ID,Rating
33,4b08e160f964a520171323e3,1
461,4df7f3f118a801cd9f119204,1
522,4aef2171f964a520ded521e3,1
899,4b0c18f9f964a520cf3623e3,1


In [85]:
#Resetting the index to avoid future issues
test_user_ratings = test_user_ratings.reset_index(drop=True)
test_user_ratings

Unnamed: 0,Venue ID,Rating
0,4b08e160f964a520171323e3,1
1,4df7f3f118a801cd9f119204,1
2,4aef2171f964a520ded521e3,1
3,4b0c18f9f964a520cf3623e3,1


In [86]:
test_user_likes =  chi_onehot_all[chi_onehot_all['Venue ID'].isin(test_user_ratings['Venue ID'].tolist())]
test_user_likes

Unnamed: 0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,...,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint,Rating
0,4b08e160f964a520171323e3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,4df7f3f118a801cd9f119204,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
2,4aef2171f964a520ded521e3,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
3,4b0c18f9f964a520cf3623e3,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [87]:
#Resetting the index to avoid future issues
test_user_likes = test_user_likes.reset_index(drop=True)
#Dropping unnecessary issues due to save memory and to avoid issues
test_user_features = test_user_likes.drop('Venue ID', 1)
test_user_features

Unnamed: 0,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,...,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint,Rating
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [88]:
test_user_ratings['Rating']

0    1
1    1
2    1
3    1
Name: Rating, dtype: int64

In [101]:
#Dot produt to get weights
userProfile = test_user_features.transpose().dot(test_user_ratings['Rating'])
#The user profile
userProfile.shape

(105,)

In [90]:
#Now let's get the features of every restaurant in our original dataframe
features_df = chi_onehot_all.set_index(chi_onehot_all['Venue ID'])
features_df.head()

Unnamed: 0_level_0,Venue ID,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4c117c7e17002d7f4755e609,4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [91]:
#And drop the unnecessary information
features_df = features_df.drop('Venue ID', 1)
features_df.head()

Unnamed: 0_level_0,Edgewater,Humboldt Park,Lake View,Lincoln Park,Lincoln Square,Logan Square,Lower West Side,Near North Side,Near South Side,Near West Side,...,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4c117c7e17002d7f4755e609,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [92]:
#Multiply the features by the weights and then take the weighted average
recommendationTable_df = ((features_df*userProfile).sum(axis=1))/(userProfile.sum())
#Sort our recommendations in descending order
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)
recommendationTable_df.head()

Venue ID
5710dcf0498e87c71d20b69d    0.000000
4b5dde6ef964a520fa7029e3    0.000000
5a2071ca47f876422319a3b6    0.000000
4c117c7e17002d7f4755e609    0.083333
51cf7f97498ee7d50a505393    0.000000
dtype: float64

In [94]:
#The final recommendation table
recommended_venues = chi_venues.loc[chi_venues['Venue ID'].isin(recommendationTable_df.head(5).keys())]
recommended_venues

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
232,Near North Side,Happy Camper,56316c6a498ebdd0e0e529e4,41.904306,-87.63437,486,Pizza Place
233,Near North Side,Lou Malnati's Pizzeria,4e026a67e4cdbd9a51745782,41.902591,-87.628703,479,Pizza Place
237,Near North Side,LuxBar,4a176a52f964a52058791fe3,41.901566,-87.627786,491,American Restaurant
239,Near North Side,Domino's Pizza,4b59c45df964a520fa9628e3,41.903763,-87.633175,419,Pizza Place
240,Near North Side,Tavern on Rush,4176fc00f964a520b51d1fe3,41.901684,-87.628,480,American Restaurant
248,Near North Side,Walton Street Kitchen + Bar,5ba6563e33e118002c8eb30a,41.899826,-87.628329,415,American Restaurant
249,Near North Side,Farmhouse Chicago,4df7f3f118a801cd9f119204,41.896726,-87.635361,401,American Restaurant
263,Near North Side,Pizano's Pizza & Pasta,4aff74bff964a520c43822e3,41.898733,-87.628264,443,Pizza Place
267,Near North Side,Local’84,5b43ef85cad1b6002c06623d,41.901709,-87.631601,238,Café
273,Near North Side,BIG & little's Restaurant,4aef2171f964a520ded521e3,41.898356,-87.637094,361,Fast Food Restaurant


In [95]:
recommended_ratings = pd.merge(recommended_venues, recommendationTable_df.rename('Rating'), left_on='Venue ID', right_index=True)
recommended_ratings

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category,Rating
232,Near North Side,Happy Camper,56316c6a498ebdd0e0e529e4,41.904306,-87.63437,486,Pizza Place,0.25
233,Near North Side,Lou Malnati's Pizzeria,4e026a67e4cdbd9a51745782,41.902591,-87.628703,479,Pizza Place,0.25
237,Near North Side,LuxBar,4a176a52f964a52058791fe3,41.901566,-87.627786,491,American Restaurant,0.25
239,Near North Side,Domino's Pizza,4b59c45df964a520fa9628e3,41.903763,-87.633175,419,Pizza Place,0.25
240,Near North Side,Tavern on Rush,4176fc00f964a520b51d1fe3,41.901684,-87.628,480,American Restaurant,0.25
248,Near North Side,Walton Street Kitchen + Bar,5ba6563e33e118002c8eb30a,41.899826,-87.628329,415,American Restaurant,0.25
249,Near North Side,Farmhouse Chicago,4df7f3f118a801cd9f119204,41.896726,-87.635361,401,American Restaurant,0.25
263,Near North Side,Pizano's Pizza & Pasta,4aff74bff964a520c43822e3,41.898733,-87.628264,443,Pizza Place,0.25
267,Near North Side,Local’84,5b43ef85cad1b6002c06623d,41.901709,-87.631601,238,Café,0.166667
273,Near North Side,BIG & little's Restaurant,4aef2171f964a520ded521e3,41.898356,-87.637094,361,Fast Food Restaurant,0.25


In [96]:
recommended_venues = recommended_ratings.drop('Venue ID', 1).drop('Distance', 1)
recommended_venues

Unnamed: 0,Neighborhood,Venue,Venue Latitude,Venue Longitude,Venue Category,Rating
232,Near North Side,Happy Camper,41.904306,-87.63437,Pizza Place,0.25
233,Near North Side,Lou Malnati's Pizzeria,41.902591,-87.628703,Pizza Place,0.25
237,Near North Side,LuxBar,41.901566,-87.627786,American Restaurant,0.25
239,Near North Side,Domino's Pizza,41.903763,-87.633175,Pizza Place,0.25
240,Near North Side,Tavern on Rush,41.901684,-87.628,American Restaurant,0.25


In [97]:
# create map
recommendation_map = folium.Map(location=[latitude, longitude], zoom_start=11)

In [100]:
# add markers to the map
for lat, lon, name, cat, rat in zip(recommended_venues['Venue Latitude'], recommended_venues['Venue Longitude'], recommended_venues['Venue'], recommended_venues['Venue Category'], recommended_venues['Rating']):
    label = folium.Popup(str(name) + ' (' + str(cat) + ', ' + str(rat) + ')', parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(recommendation_map)
       
recommendation_map