In [12]:
# I'm going to do a KNN analysis of the postal codes areas in toronto. 
# I'm going to be able to determine, given a post code what are the best other districts to visit for food

In [1]:
import pandas as pd
import requests
import json
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import math
import numpy as np

In [14]:
df = pd.read_csv(r"C:\Users\BenjaminWeller\Documents\Projects\Notebook\Geospatial_Coordinates.csv")
df.rename(columns={"Postal Code":"Postcode"}, inplace=True)
df.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
def nearby_venues(names, latitudes, longitudes, radius=500, limit=1000):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)         
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']["id"],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue',
                  "Venue_ID",
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']    
    return(nearby_venues)

In [21]:
toronto_venues = nearby_venues(names=df['Postcode'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

In [22]:
toronto_venues.to_excel(r"C:\Users\BenjaminWeller\Documents\Projects\Notebook\toronto_venues.xlsx")

In [3]:
toronto_venues = pd.read_excel("toronto_venues.xlsx")

In [4]:
toronto_venues.head()

Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue_ID,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,4bb6b9446edc76b0d771311c,43.807448,-79.199056,Fast Food Restaurant
1,M1B,43.806686,-79.194353,Interprovincial Group,5539e7d2498edaf4b02673ca,43.80563,-79.200378,Print Shop
2,M1C,43.784535,-79.160497,Chris Effects Painting,587eee906d349d5759059742,43.784343,-79.163742,Construction & Landscaping
3,M1C,43.784535,-79.160497,Royal Canadian Legion,4c23d3aaf7ced13a5ed7216d,43.782533,-79.163085,Bar
4,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,4b6074e3f964a5200fe729e3,43.767697,-79.189914,Pizza Place


In [5]:
def get_likes(venue_ids):
    my_list = []
    for venue, number in zip(venue_ids, range(925)):
        #Get the likes for the place 
        url = f"https://api.foursquare.com/v2/venues/{venue}?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}"
        json = requests.get(url).json()
        if json.get("response") and \
        json.get("response").get('venue') and \
        json.get("response").get('venue').get("likes") and \
        json.get("response").get('venue').get("likes").get("count"):
            likes = json.get("response").get('venue').get("likes").get("count")
            my_list.append([venue, likes])
        else:
            my_list.append([venue, None])
        retruned = pd.DataFrame(my_list)
        retruned.columns = ["Venue_ID","Likes"]
    return retruned

In [5]:
partial_venues = get_likes(toronto_venues["Venue_ID"])

In [10]:
print(partial_venues.shape)
print(toronto_venues.shape)

(925, 2)
(2255, 8)


In [15]:
toronto_venues[~toronto_venues["Venue_ID"].isin(partial_venues.Venue_ID)]

Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue_ID,Venue Latitude,Venue Longitude,Venue Category
925,M5C,43.651494,-79.375418,Bannock,4dfe1cf0a809d61e2fc568ce,43.652101,-79.381178,Restaurant
926,M5C,43.651494,-79.375418,St. Urbain Bagel,4b3cf988f964a520ea8a25e3,43.648611,-79.371497,Bagel Shop
927,M5C,43.651494,-79.375418,European Delight,4b9137f3f964a520e2aa33e3,43.648710,-79.371545,Eastern European Restaurant
929,M5C,43.651494,-79.375418,Biff's Bistro,4b0f3cd2f964a520b66023e3,43.647085,-79.376342,French Restaurant
930,M5C,43.651494,-79.375418,CC Lounge,548f8a50498e9d276895f669,43.647917,-79.374520,Cocktail Bar
932,M5E,43.644771,-79.373306,LCBO,4aeb719af964a52020c221e3,43.642944,-79.372440,Liquor Store
933,M5E,43.644771,-79.373306,The Keg Steakhouse + Bar,4b56a44ff964a5206e1728e3,43.646676,-79.374822,Steakhouse
934,M5E,43.644771,-79.373306,Sony Centre for the Performing Arts,4ad4c062f964a520b8f720e3,43.646292,-79.376022,Concert Hall
936,M5E,43.644771,-79.373306,Hockey Hall Of Fame (Hockey Hall of Fame),4ad4c05ef964a520d8f620e3,43.646974,-79.377323,Museum
941,M5E,43.644771,-79.373306,Eggspectation,5b15659b1c062d0024e7cf81,43.646526,-79.375134,Breakfast Spot


In [8]:
partial_venues.head()

Unnamed: 0,Venue_ID,Likes
0,4bb6b9446edc76b0d771311c,1.0
1,5539e7d2498edaf4b02673ca,
2,587eee906d349d5759059742,
3,4c23d3aaf7ced13a5ed7216d,1.0
4,4b6074e3f964a5200fe729e3,9.0


In [8]:
partial_venues.to_excel(r"C:\Users\BenjaminWeller\Documents\Projects\Notebook\partial_venues.xlsx")

In [7]:
partial_venues = pd.read_excel(r"partial_venues.xlsx")

In [None]:
toronto_venues = getNearbyVenues(names=new_df['Postcode'],
                                   latitudes=new_df['Latitude'],
                                   longitudes=new_df['Longitude']
                                  )

In [11]:
import pandas as pd
import requests
import json
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import math
import numpy as np
import keyring
toronto_venues = pd.read_excel("toronto_venues.xlsx")


In [None]:
partial_venues = pd.read_excel("partial_venues.xlsx")

In [12]:
CLIENT_ID = "WVDY1T2NJCFNF2HJROXARRHCR2KWFOFAFZNGGM3PP1Z4XAWB"
CLIENT_SECRET = "IHF5MAVHZ1Y4CL0UALKM5UWFBY3PEYWUDB51TK3I1PBIJBWR"
VERSION = "20180605"

In [14]:
def get_likes(venue_ids):
    my_list = []
    for venue, number in zip(venue_ids, range(925)):
        #Get the likes for the place 
        url = f"https://api.foursquare.com/v2/venues/{venue}?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}"
        json = requests.get(url).json()
        if json.get("response") and \
        json.get("response").get('venue') and \
        json.get("response").get('venue').get("likes") and \
        json.get("response").get('venue').get("likes").get("count"):
            likes = json.get("response").get('venue').get("likes").get("count")
            my_list.append([venue, likes])
        else:
            my_list.append([venue, None])
        retruned = pd.DataFrame(my_list)
        retruned.columns = ["Venue_ID","Likes"]
    return retruned

In [4]:
partial_venues.shape

(925, 2)

In [5]:
remaining_toronto_venues = toronto_venues[~toronto_venues["Venue_ID"].isin(partial_venues.Venue_ID)]

In [6]:
remaining_toronto_venues.shape[0]

1130

In [8]:
if remaining_toronto_venues.shape[0] > 0:
    new_partial_venues = get_likes(remaining_toronto_venues["Venue_ID"])
    new_partial_venues = pd.concat([partial_venues,new_partial_venues])
    print(new_partial_venues.shape)
    new_partial_venues.to_excel(r"partial_venues.xlsx")
else:
    print("Looks like you've got all the venues")

(1850, 2)


In [9]:
partial_venues = pd.read_excel("partial_venues.xlsx")
print(partial_venues.shape)
partial_venues

(1850, 2)


Unnamed: 0,Venue_ID,Likes
0,4bb6b9446edc76b0d771311c,1.0
1,5539e7d2498edaf4b02673ca,
2,587eee906d349d5759059742,
3,4c23d3aaf7ced13a5ed7216d,1.0
4,4b6074e3f964a5200fe729e3,9.0
5,4c62f34bde1b2d7fec89e370,1.0
6,522deb21abdf65cfbab70655,
7,5411f741498e9ebd5e35d8bd,2.0
8,5b8004ff9fca56002cffd9cc,
9,4c1c7f9bb306c9288f0464b7,
