Create neighborhood dataframe from Excel – neighborhood, lat, long

In [1]:
# Import pandas library using an alias
import pandas as pd
# library to handle data in a vectorized manner
import numpy as np

# library to handle JSON files
import json
# library to handle requests
import requests
# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# import geocoder
import geocoder
# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# map rendering library
import folium

In [2]:
df_chi_data = pd.read_excel('CCASF12010CMAP.xlsx')
df_chi_data.head()

Unnamed: 0,GEOGKEYX,GEOGNAME,LATITUDE,UNSIGNED LONGITUDE,LONGITUDE
0,GeogKey,Geog,,,
1,1,Rogers Park,42.016667,87.666667,-87.666667
2,2,West Ridge,42.0,87.683333,-87.683333
3,3,Uptown,41.966667,87.666667,-87.666667
4,4,Lincoln Square,41.966667,87.683333,-87.683333


In [3]:
df_chi_data.drop([0], axis = 0, inplace=True)
df_chi_data.drop(['UNSIGNED LONGITUDE'], axis=1, inplace=True)
df_chi_data.drop(['GEOGKEYX'], axis=1, inplace=True)
df_chi_data.head()

Unnamed: 0,GEOGNAME,LATITUDE,LONGITUDE
1,Rogers Park,42.016667,-87.666667
2,West Ridge,42.0,-87.683333
3,Uptown,41.966667,-87.666667
4,Lincoln Square,41.966667,-87.683333
5,North Center,41.95,-87.683333


Create empty venue df – neighborhood, venue name, venue id, venue lat, venue long, distance from neighborhood center, category, price

In [4]:
df_venues = pd.DataFrame(columns=['Neighborhood','VenueName','VenueID','Latitude','Longitude','Distance','Category','Price'])
df_venues

Unnamed: 0,Neighborhood,VenueName,VenueID,Latitude,Longitude,Distance,Category,Price


Loop through neighborhoods

search for venues by long/lat
use json to populate venue df

In [5]:
# define Foursquare variables
CLIENT_ID = 'CT3K4Z2AEBTWGOKQLQKZ135JJ3B44KOQTB4BMEJ4R0AXXWSD' # your Foursquare ID
CLIENT_SECRET = 'TSJKVMWFPKG2ZV4NYZQDVIR5FIYOSRICDTHHKHWSMV5JHDMZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [6]:
# create a function to repeat the same process to all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius, LIMIT, CATEGORY, INTENT):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&categoryId={}&radius={}&limit={}&intent={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng,
            VERSION,
            CATEGORY, 
            radius, 
            LIMIT, 
            INTENT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            v['name'], 
            v['id'], 
            v['location']['lat'], 
            v['location']['lng'],  
            v['location']['distance'],  
            v['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Venue', 
                  'Venue ID', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Distance',
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
chi_venues = getNearbyVenues(names=df_chi_data['GEOGNAME'],
                                   latitudes=df_chi_data['LATITUDE'],
                                   longitudes=df_chi_data['LONGITUDE'],
                                   radius=500,
                                   LIMIT=100, 
                                   CATEGORY = '4d4b7105d754a06374d81259',
                                   INTENT = 'browse'
                                  )
chi_venues.head()

Rogers Park
West Ridge
Uptown
Lincoln Square
North Center
Lake View
Lincoln Park
Near North Side
Edison Park
Norwood Park
Jefferson Park
Forest Glen
North Park
Albany Park
Portage Park
Irving Park
Dunning
Montclare
Belmont Cragin
Hermosa
Avondale
Logan Square
Humboldt Park
West Town
Austin
West Garfield Park
East Garfield Park
Near West Side
North Lawndale
South Lawndale
Lower West Side
The Loop
Near South Side
Armour Square
Douglas
Oakland
Fuller Park
Grand Boulevard
Kenwood
Washington Park
Hyde Park
Woodlawn
South Shore
Chatham
Avalon Park
South Chicago
Burnside
Calumet Heights
Roseland
Pullman
South Deering
East Side
West Pullman
Riverdale
Hegewisch
Garfield Ridge
Archer Heights
Brighton Park
McKinley Park
Bridgeport
New City
West Elsdon
Gage Park
Clearing
West Lawn
Chicago Lawn
West Englewood
Englewood
Greater Grand Crossing
Ashburn
Auburn Gresham
Beverly
Washington Heights
Mount Greenwood
Morgan Park
O'Hare
Edgewater


Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
0,Rogers Park,Charmers Cafe,5710dcf0498e87c71d20b69d,42.016164,-87.66825,142,Café
1,Rogers Park,Caribbean American Bakery,4b5dde6ef964a520fa7029e3,42.019371,-87.669705,392,Bakery
2,Rogers Park,Tjam Kitchen,5a2071ca47f876422319a3b6,42.01931,-87.66692,294,Restaurant
3,Rogers Park,Jarvis Grill,4c117c7e17002d7f4755e609,42.015989,-87.66888,198,Fast Food Restaurant
4,Rogers Park,Jamaican Bakery,51cf7f97498ee7d50a505393,42.018398,-87.669414,297,Bakery


Remove duplicates from venue df based on distance from neighborhood center (maybe sort by distance then keep first)

In [9]:
chi_venues.duplicated('Venue ID')
chi_venues_duplicates = chi_venues[chi_venues.duplicated(['Venue ID'])]
 
print("Duplicate venues are:", chi_venues_duplicates, sep='\n')

Duplicate venues are:
            Neighborhood                                          Venue  \
313           North Park                                      Starbucks   
314           North Park                                   Coffee Joint   
315           North Park                                  Laschet's Inn   
316           North Park                         Potbelly Sandwich Shop   
317           North Park                                      Mod Pizza   
318           North Park                   Reclaimed Bar and Restaurant   
319           North Park                               Borinquen Lounge   
320           North Park                                   Pete's Pizza   
625   East Garfield Park                               Al's Under the L   
626   East Garfield Park                           Lake's Best Pizzaria   
627   East Garfield Park                                   Vegies Pizza   
628   East Garfield Park                            Supper Club Chicago   
629

Sort venue list by venue ID and distance from neighborhood center

In [11]:
chi_venues.sort_values(by=['Venue ID', 'Distance'], inplace=True)
chi_venues.head()

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
746,The Loop,Atwood,3fd66200f964a520c7f01ee3,41.883205,-87.628191,426,New American Restaurant
234,Lincoln Park,Sai Cafe,3fd66200f964a520e1ed1ee3,41.918481,-87.653361,343,Sushi Restaurant
718,The Loop,Monk's Pub,40b28c80f964a52045fb1ee3,41.88564,-87.634339,269,Pub
130,Lincoln Square,Daily Bar & Grill,40b28c80f964a5205ffd1ee3,41.964823,-87.686073,305,Bar
168,North Center,Laschet's Inn,40b28c80f964a520a5fc1ee3,41.954091,-87.681978,469,German Restaurant


In [13]:
chi_venues[chi_venues['Venue ID'] == '4f32849d19836c91c7df997c']

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
558,Humboldt Park,Ja's Jerk Chicken,4f32849d19836c91c7df997c,41.884309,-87.696617,300,Food
634,East Garfield Park,Ja's Jerk Chicken,4f32849d19836c91c7df997c,41.884309,-87.696617,300,Food


In [14]:
chi_venues.drop_duplicates(subset="Venue ID", keep='first', inplace=True)
chi_venues[chi_venues['Venue ID'] == '4f32849d19836c91c7df997c']

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
558,Humboldt Park,Ja's Jerk Chicken,4f32849d19836c91c7df997c,41.884309,-87.696617,300,Food


In [15]:
print(chi_venues.shape)

(1272, 7)


In [16]:
# how many venues for each neighborhood
chi_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Albany Park,50,50,50,50,50,50
Archer Heights,6,6,6,6,6,6
Armour Square,50,50,50,50,50,50
Ashburn,11,11,11,11,11,11
Auburn Gresham,16,16,16,16,16,16
Austin,16,16,16,16,16,16
Avalon Park,24,24,24,24,24,24
Avondale,17,17,17,17,17,17
Belmont Cragin,39,39,39,39,39,39
Beverly,34,34,34,34,34,34


In [17]:
# how many unique categories
print('There are {} uniques categories.'.format(len(chi_venues['Venue Category'].unique())))

There are 106 uniques categories.


Use one-hot encoding on categories and load into new df (copy venue df first – 1he)

In [23]:
# one hot encoding
chi_onehot_cat = pd.get_dummies(chi_venues[['Venue Category']], prefix="", prefix_sep="")
chi_onehot_cat.head()

Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
746,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
130,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
168,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# add venue ID column back to dataframe
chi_onehot_cat['Venue ID'] = chi_venues['Venue ID']
chi_onehot_cat.head()

Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint,Venue ID
746,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3fd66200f964a520c7f01ee3
234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3fd66200f964a520e1ed1ee3
718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,40b28c80f964a52045fb1ee3
130,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,40b28c80f964a5205ffd1ee3
168,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,40b28c80f964a520a5fc1ee3


In [25]:
# move venue ID to the first column
fixed_columns = [chi_onehot_cat.columns[-1]] + list(chi_onehot_cat.columns[:-1])
chi_onehot_cat = chi_onehot_cat[fixed_columns]
chi_onehot_cat.head()

Unnamed: 0,Venue ID,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,...,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wings Joint
746,3fd66200f964a520c7f01ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
234,3fd66200f964a520e1ed1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
718,40b28c80f964a52045fb1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
130,40b28c80f964a5205ffd1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
168,40b28c80f964a520a5fc1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Use one-hot encoding on neighborhoods

In [26]:
# one hot encoding
chi_onehot_nhood = pd.get_dummies(chi_venues[['Neighborhood']], prefix="", prefix_sep="")
chi_onehot_nhood.head()

Unnamed: 0,Albany Park,Archer Heights,Armour Square,Ashburn,Auburn Gresham,Austin,Avalon Park,Avondale,Belmont Cragin,Beverly,...,Washington Heights,Washington Park,West Elsdon,West Englewood,West Garfield Park,West Lawn,West Pullman,West Ridge,West Town,Woodlawn
746,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
130,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
168,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
# add venue ID column back to dataframe
chi_onehot_nhood['Venue ID'] = chi_venues['Venue ID']
# move venue ID to the first column
fixed_columns = [chi_onehot_nhood.columns[-1]] + list(chi_onehot_nhood.columns[:-1])
chi_onehot_nhood = chi_onehot_nhood[fixed_columns]

chi_onehot_nhood.head()

Unnamed: 0,Venue ID,Albany Park,Archer Heights,Armour Square,Ashburn,Auburn Gresham,Austin,Avalon Park,Avondale,Belmont Cragin,...,Washington Heights,Washington Park,West Elsdon,West Englewood,West Garfield Park,West Lawn,West Pullman,West Ridge,West Town,Woodlawn
746,3fd66200f964a520c7f01ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
234,3fd66200f964a520e1ed1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
718,40b28c80f964a52045fb1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
130,40b28c80f964a5205ffd1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
168,40b28c80f964a520a5fc1ee3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Create df with price of venues

In [47]:
def is_empty(any_structure):
    if any_structure:
        print('Structure is not empty.')
        return False
    else:
        print('Structure is empty.')
        return True

In [41]:
# create a function to repeat the same process to all venues
def getVenuePrice(venueids):
    
    venues_list=[]
    for venue_id in venueids:
        print(venue_id)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()
        results
        
        # return price for each venue
        #if "venue" in results['response']:
         #   if "price" in results['response']['venue']:
          #      venues_list.append([(venue_id, results['response']['venue']['price']['message']) ])
           # else:
            #    venues_list.append([(venue_id, "Price Unknown")])
        #else:
         #   results['response']

    #venue_prices = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    #venue_prices.columns = ['Venue ID', 'Price']
    
    #return(venue_prices)
    return(0)

In [49]:
# create a function to repeat the same process to all venues
def getVenuePrice(venueids):
    
    venues_list=[]
    for venue_id in venueids:
        #print(venue_id)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()
        
        # return price for each venue
        if results['response']:
            if results['response']['venue']:
                if results['response']['venue']['price']:
                    print("The price of {} is {}.".format(venue_id, results['response']['venue']['price']['message']))
                else:
                    print("There is no price for {}.".format(venue_id))
            else:
                print("There is no venue data for {}.".format(venue_id))
        else:
            print("There is no response for {}.".format(venue_id))

    #venue_prices = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    #venue_prices.columns = ['Venue ID', 'Price']
    
    #return(venue_prices)
    return(0)

In [50]:
chi_venues_sm = chi_venues.head()
chi_venues_sm

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
746,The Loop,Atwood,3fd66200f964a520c7f01ee3,41.883205,-87.628191,426,New American Restaurant
234,Lincoln Park,Sai Cafe,3fd66200f964a520e1ed1ee3,41.918481,-87.653361,343,Sushi Restaurant
718,The Loop,Monk's Pub,40b28c80f964a52045fb1ee3,41.88564,-87.634339,269,Pub
130,Lincoln Square,Daily Bar & Grill,40b28c80f964a5205ffd1ee3,41.964823,-87.686073,305,Bar
168,North Center,Laschet's Inn,40b28c80f964a520a5fc1ee3,41.954091,-87.681978,469,German Restaurant


In [51]:
dummy = getVenuePrice(venueids=chi_venues_sm['Venue ID'])

There is no response for 3fd66200f964a520c7f01ee3.
There is no response for 3fd66200f964a520e1ed1ee3.
There is no response for 40b28c80f964a52045fb1ee3.
There is no response for 40b28c80f964a5205ffd1ee3.
There is no response for 40b28c80f964a520a5fc1ee3.


In [44]:
venue_id = '3fd66200f964a520c7f01ee3'
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
# make the GET request
results = requests.get(url).json()
results

{'meta': {'code': 429,
  'errorType': 'quota_exceeded',
  'errorDetail': 'Quota exceeded',
  'requestId': '5d0edc44d176e9002579d695'},
 'response': {}}

In [45]:
resp = results['response']
resp

{}

In [48]:
is_empty(resp)

Structure is empty.


True

In [54]:
# create a function to repeat the same process to all venues
def getVenuePrice(venueids):
    
    venues_list=[]
    for venue_id in venueids:
        #print(venue_id)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()
        
        # return price for each venue
        if results['response']:
            if results['response']['venue']:
                if results['response']['venue']['price']:
                    venues_list.append([(venue_id, results['response']['venue']['price']['message']) ])
                else:
                    venues_list.append([(venue_id, "Price Unknown")])
            else:
                venues_list.append([(venue_id, "Price Unknown")])
        else:
            venues_list.append([(venue_id, "Price Unknown")])

    venue_prices = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    venue_prices.columns = ['Venue ID', 'Price']
    
    return(venue_prices)

In [55]:
chi_venue_prices = getVenuePrice(venueids=chi_venues['Venue ID'])
chi_venue_prices.head()

Unnamed: 0,Venue ID,Price
0,3fd66200f964a520c7f01ee3,Price Unknown
1,3fd66200f964a520e1ed1ee3,Price Unknown
2,40b28c80f964a52045fb1ee3,Price Unknown
3,40b28c80f964a5205ffd1ee3,Price Unknown
4,40b28c80f964a520a5fc1ee3,Price Unknown


In [56]:
chi_venue_prices.groupby('Price').count()

Unnamed: 0_level_0,Venue ID
Price,Unnamed: 1_level_1
Price Unknown,1272


Create one-hot encoding df of prices

In [32]:
# one hot encoding
chi_onehot_prices = pd.get_dummies(chi_venue_prices[['Price']], prefix="", prefix_sep="")
chi_onehot_prices.head()

Unnamed: 0,Price Unknown
0,1
1,1
2,1
3,1
4,1


In [36]:
# add venue ID column back to dataframe
chi_onehot_prices['Venue ID'] = chi_venue_prices['Venue ID']
chi_onehot_prices.head()

Unnamed: 0,Cheap,Expensive,Moderate,Price Unknown,Venue ID
0,1,0,0,0,5710dcf0498e87c71d20b69d
1,1,0,0,0,4b5dde6ef964a520fa7029e3
2,0,0,1,0,5a2071ca47f876422319a3b6
3,1,0,0,0,4c117c7e17002d7f4755e609
4,1,0,0,0,51cf7f97498ee7d50a505393


Combine one-hot encoding dfs for categories, neighborhoods and prices

In [37]:
chi_onehot_all = pd.merge(pd.merge(chi_onehot_prices, chi_onehot_nhood, on='Venue ID'), chi_onehot_cat, on='Venue ID')
chi_onehot_all.head()

Unnamed: 0,Cheap,Expensive,Moderate,Price Unknown,Venue ID,Lincoln Square,North Center,Rogers Park,Uptown,West Ridge,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
0,1,0,0,0,5710dcf0498e87c71d20b69d,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,4b5dde6ef964a520fa7029e3,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,5a2071ca47f876422319a3b6,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
3,1,0,0,0,4c117c7e17002d7f4755e609,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,51cf7f97498ee7d50a505393,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
# move venue ID to the first column
fixed_columns = [chi_onehot_all.columns[5]] + list(chi_onehot_all.columns[:5]) + list(chi_onehot_all.columns[6:])
chi_onehot_all = chi_onehot_all[fixed_columns]
chi_onehot_all.head()

Unnamed: 0,Lincoln Square,Cheap,Expensive,Moderate,Price Unknown,Venue ID,North Center,Rogers Park,Uptown,West Ridge,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
0,0,1,0,0,0,5710dcf0498e87c71d20b69d,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,4b5dde6ef964a520fa7029e3,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,5a2071ca47f876422319a3b6,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,1,0,0,0,4c117c7e17002d7f4755e609,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,51cf7f97498ee7d50a505393,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


Get user ratings of a venue

In [39]:
venue_id = '51cf7f97498ee7d50a505393'
url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
results = requests.get(url).json()
results['response']['likes']

{'count': 0}

In [40]:
venue_id = '4c117c7e17002d7f4755e609'
url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d0e8d1b1a292500255ac700'},
 'response': {'likes': {'count': 8,
   'summary': '8 Likes',
   'items': [{'id': '15833163',
     'firstName': 'miss eee',
     'gender': 'none',
     'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
      'suffix': '/YCVD4NKMEL1OLLO3.jpg'}},
    {'id': '2084561',
     'firstName': 'Edwin',
     'lastName': 'C',
     'gender': 'male',
     'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
      'suffix': '/2084561-EX3ST4XDPWMLS1NU.jpg'}},
    {'id': '101365821',
     'firstName': 'William',
     'lastName': 'G',
     'gender': 'male',
     'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
      'suffix': '/101365821-QY2VUB4SO4SJIKPK.jpg'}}]}}}

In [41]:
results['response']['likes']['items'][0]['id']

'15833163'

In [42]:
for i in results['response']['likes']['items']:
    print(i['id'])

15833163
2084561
101365821


Loop thru venue df

search users who liked a venue ??
use json to populate user df

In [45]:
# create a function to get users who liked each venue
def getVenueLikes(venueids):
    
    venues_list=[]
    for venue_id in venueids:
        #print(venue_id)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(
            venue_id, 
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()['response']['likes']
        
        # return price for each venue
        if "items" in results:
            for i in results['items']:
                venues_list.append([(venue_id, i['id'], 1) ])
        else:
            print("No likes for venue {}.".format(venue_id))

    venue_likes = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    venue_likes.columns = ['Venue ID', 'User ID', 'Rating']
    
    return(venue_likes)

In [46]:
chi_venue_likes = getVenueLikes(venueids=chi_venues['Venue ID'])
chi_venue_likes.head()

No likes for venue 51cf7f97498ee7d50a505393.


Unnamed: 0,Venue ID,User ID,Rating
0,5710dcf0498e87c71d20b69d,515900,1
1,5710dcf0498e87c71d20b69d,229698,1
2,5710dcf0498e87c71d20b69d,51646211,1
3,4b5dde6ef964a520fa7029e3,50475374,1
4,4b5dde6ef964a520fa7029e3,2967333,1


In [49]:
print(chi_venue_likes[chi_venue_likes['User ID'] == '229698'] )

                   Venue ID User ID  Rating
1  5710dcf0498e87c71d20b69d  229698       1


In [50]:
chi_venue_likes.groupby('Venue ID').count()

Unnamed: 0_level_0,User ID,Rating
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1
40b28c80f964a520a5fc1ee3,3,3
4aa3dfaaf964a520384420e3,3,3
4af61997f964a5202c0122e3,1,1
4b08e160f964a520171323e3,3,3
4b0b0598f964a520ad2b23e3,3,3
4b35afb9f964a5208a2e25e3,3,3
4b4a7352f964a520038826e3,3,3
4b5dde6ef964a520fa7029e3,2,2
4b7e538af964a52096e92fe3,3,3
4c117c7e17002d7f4755e609,3,3


In [51]:
chi_venue_likes.groupby('User ID').count()

Unnamed: 0_level_0,Venue ID,Rating
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1
101365821,1,1
11377271,1,1
11443522,1,1
11911565,1,1
122886,1,1
124547371,1,1
1300363,1,1
14039436,1,1
148413047,1,1
152200,1,1


In [53]:
chi_venue_likes.groupby('User ID').filter(lambda x: len(x) > 1) 

Unnamed: 0,Venue ID,User ID,Rating
2,5710dcf0498e87c71d20b69d,51646211,1
25,4b35afb9f964a5208a2e25e3,75339681,1
30,4b4a7352f964a520038826e3,75339681,1
38,4aa3dfaaf964a520384420e3,51646211,1


Pick a test user.
Save test user ratings from user ratings to new df.

In [58]:
test_user_ratings = chi_venue_likes[chi_venue_likes['User ID'] == '51646211'].drop('User ID', 1)
test_user_ratings

Unnamed: 0,Venue ID,Rating
2,5710dcf0498e87c71d20b69d,1
38,4aa3dfaaf964a520384420e3,1


In [61]:
#Resetting the index to avoid future issues
test_user_ratings = test_user_ratings.reset_index(drop=True)

In [62]:
test_user_ratings

Unnamed: 0,Venue ID,Rating
0,5710dcf0498e87c71d20b69d,1
1,4aa3dfaaf964a520384420e3,1


Create new venue df of those the test user has rated (based on one-hot encoding df).

In [60]:
test_user_likes =  chi_onehot_all[chi_onehot_all['Venue ID'].isin(test_user_ratings['Venue ID'].tolist())]
test_user_likes

Unnamed: 0,Lincoln Square,Cheap,Expensive,Moderate,Price Unknown,Venue ID,North Center,Rogers Park,Uptown,West Ridge,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
0,0,1,0,0,0,5710dcf0498e87c71d20b69d,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
16,1,1,0,0,0,4aa3dfaaf964a520384420e3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Drop unnecessary columns

In [63]:
#Resetting the index to avoid future issues
test_user_likes = test_user_likes.reset_index(drop=True)
#Dropping unnecessary issues due to save memory and to avoid issues
test_user_features = test_user_likes.drop('Venue ID', 1)
test_user_features

Unnamed: 0,Lincoln Square,Cheap,Expensive,Moderate,Price Unknown,North Center,Rogers Park,Uptown,West Ridge,Bakery,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
0,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Create vector of test user’s ratings

In [65]:
test_user_ratings['Rating']

0    1
1    1
Name: Rating, dtype: int64

Use dot product of vector and test user’s ratings to build test user profile

In [66]:
#Dot produt to get weights
userProfile = test_user_features.transpose().dot(test_user_ratings['Rating'])
#The user profile
userProfile

Lincoln Square             1
Cheap                      2
Expensive                  0
Moderate                   0
Price Unknown              0
North Center               0
Rogers Park                1
Uptown                     0
West Ridge                 0
Bakery                     0
Butcher                    0
Café                       1
Coffee Shop                1
Fast Food Restaurant       0
German Restaurant          0
Hot Dog Joint              0
Indian Restaurant          0
Italian Restaurant         0
Mexican Restaurant         0
New American Restaurant    0
Pizza Place                0
Restaurant                 0
Sandwich Place             0
Sushi Restaurant           0
Taco Place                 0
dtype: int64

Let's start by extracting the genre table from the original dataframe:

In [68]:
#Now let's get the features of every restaurant in our original dataframe
features_df = chi_onehot_all.set_index(chi_onehot_all['Venue ID'])
features_df.head()

Unnamed: 0_level_0,Lincoln Square,Cheap,Expensive,Moderate,Price Unknown,Venue ID,North Center,Rogers Park,Uptown,West Ridge,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,0,1,0,0,0,5710dcf0498e87c71d20b69d,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,0,1,0,0,0,4b5dde6ef964a520fa7029e3,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,0,0,0,1,0,5a2071ca47f876422319a3b6,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4c117c7e17002d7f4755e609,0,1,0,0,0,4c117c7e17002d7f4755e609,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,0,1,0,0,0,51cf7f97498ee7d50a505393,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


Drop unnecessary columns from 1he df

In [69]:
#And drop the unnecessary information
features_df = features_df.drop('Venue ID', 1)
features_df.head()

Unnamed: 0_level_0,Lincoln Square,Cheap,Expensive,Moderate,Price Unknown,North Center,Rogers Park,Uptown,West Ridge,Bakery,...,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Taco Place
Venue ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5710dcf0498e87c71d20b69d,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4b5dde6ef964a520fa7029e3,0,1,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5a2071ca47f876422319a3b6,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4c117c7e17002d7f4755e609,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
51cf7f97498ee7d50a505393,0,1,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0


With the input's profile and the complete list of movies and their genres in hand, we're going to take the weighted average of every movie based on the input profile and recommend the top twenty movies that most satisfy it.

In [70]:
#Multiply the features by the weights and then take the weighted average
recommendationTable_df = ((features_df*userProfile).sum(axis=1))/(userProfile.sum())
recommendationTable_df.head()

Venue ID
5710dcf0498e87c71d20b69d    0.666667
4b5dde6ef964a520fa7029e3    0.500000
5a2071ca47f876422319a3b6    0.166667
4c117c7e17002d7f4755e609    0.500000
51cf7f97498ee7d50a505393    0.500000
dtype: float64

Sort it

In [71]:
#Sort our recommendations in descending order
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)
#Just a peek at the values
recommendationTable_df.head()

Venue ID
5710dcf0498e87c71d20b69d    0.666667
560beb93498e42eb6c7f5d02    0.666667
4aa3dfaaf964a520384420e3    0.666667
59d556c4f0b49010357e5b1d    0.500000
4b5dde6ef964a520fa7029e3    0.500000
dtype: float64

In [80]:
#The final recommendation table
recommended_venues = chi_venues.loc[chi_venues['Venue ID'].isin(recommendationTable_df.head(10).keys())]
recommended_venues

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category
0,Rogers Park,Charmers Cafe,5710dcf0498e87c71d20b69d,42.016164,-87.66825,142,Café
1,Rogers Park,Caribbean American Bakery,4b5dde6ef964a520fa7029e3,42.019371,-87.669705,392,Bakery
3,Rogers Park,Jarvis Grill,4c117c7e17002d7f4755e609,42.015989,-87.66888,198,Fast Food Restaurant
4,Rogers Park,Jamaican Bakery,51cf7f97498ee7d50a505393,42.018398,-87.669414,297,Bakery
5,West Ridge,Ghareeb Nawaz,4b7e538af964a52096e92fe3,41.998019,-87.681569,264,Indian Restaurant
11,Uptown,Ridman’s Coffee,59d556c4f0b49010357e5b1d,41.968856,-87.667612,255,Coffee Shop
16,Lincoln Square,Starbucks,4aa3dfaaf964a520384420e3,41.964799,-87.685861,294,Coffee Shop
19,Lincoln Square,Oromo Cafe,560beb93498e42eb6c7f5d02,41.966883,-87.687252,325,Café
20,North Center,Starbucks,54273f56498e550c0584a8bb,41.947936,-87.688509,486,Coffee Shop
21,North Center,Coffee Joint,59af0feb58002c0bd652ea47,41.954073,-87.681114,489,Coffee Shop


Add suggested ratings

In [81]:
recommended_ratings = pd.merge(recommended_venues, recommendationTable_df.rename('Rating'), left_on='Venue ID', right_index=True)
recommended_ratings

Unnamed: 0,Neighborhood,Venue,Venue ID,Venue Latitude,Venue Longitude,Distance,Venue Category,Rating
0,Rogers Park,Charmers Cafe,5710dcf0498e87c71d20b69d,42.016164,-87.66825,142,Café,0.666667
1,Rogers Park,Caribbean American Bakery,4b5dde6ef964a520fa7029e3,42.019371,-87.669705,392,Bakery,0.5
3,Rogers Park,Jarvis Grill,4c117c7e17002d7f4755e609,42.015989,-87.66888,198,Fast Food Restaurant,0.5
4,Rogers Park,Jamaican Bakery,51cf7f97498ee7d50a505393,42.018398,-87.669414,297,Bakery,0.5
5,West Ridge,Ghareeb Nawaz,4b7e538af964a52096e92fe3,41.998019,-87.681569,264,Indian Restaurant,0.333333
11,Uptown,Ridman’s Coffee,59d556c4f0b49010357e5b1d,41.968856,-87.667612,255,Coffee Shop,0.5
16,Lincoln Square,Starbucks,4aa3dfaaf964a520384420e3,41.964799,-87.685861,294,Coffee Shop,0.666667
19,Lincoln Square,Oromo Cafe,560beb93498e42eb6c7f5d02,41.966883,-87.687252,325,Café,0.666667
20,North Center,Starbucks,54273f56498e550c0584a8bb,41.947936,-87.688509,486,Coffee Shop,0.5
21,North Center,Coffee Joint,59af0feb58002c0bd652ea47,41.954073,-87.681114,489,Coffee Shop,0.5


Drop unnecessary columns

In [87]:
recommended_venues = recommended_ratings.drop('Venue ID', 1).drop('Distance', 1)
recommended_venues.head()

Unnamed: 0,Neighborhood,Venue,Venue Latitude,Venue Longitude,Venue Category,Rating
0,Rogers Park,Charmers Cafe,42.016164,-87.66825,Café,0.666667
1,Rogers Park,Caribbean American Bakery,42.019371,-87.669705,Bakery,0.5
3,Rogers Park,Jarvis Grill,42.015989,-87.66888,Fast Food Restaurant,0.5
4,Rogers Park,Jamaican Bakery,42.018398,-87.669414,Bakery,0.5
5,West Ridge,Ghareeb Nawaz,41.998019,-87.681569,Indian Restaurant,0.333333


Plot the top 5-10

In [85]:
# get coordinates for Toronto
address = 'Chicago, IL'

geolocator = Nominatim(user_agent="chi_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Chicago are 41.8755616, -87.6244212.


In [89]:
# create map
recommendation_map = folium.Map(location=[latitude, longitude], zoom_start=11)

In [90]:
# add markers to the map
for lat, lon, name, rat in zip(recommended_venues['Venue Latitude'], recommended_venues['Venue Longitude'], recommended_venues['Venue'], recommended_venues['Rating']):
    label = folium.Popup(str(name) + ' Rating ' + str(rat), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(recommendation_map)
       
recommendation_map