In [1]:
#required libraries
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
from geopy.geocoders import Nominatim
import requests
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
#gets wiki page and puts it into BeautifulSoup
html = requests.get(url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(html)

In [None]:
#prettifies
print(soup.prettify())

In [4]:
#creates lists for each column and cleans "Neighbourhood"
#this code will work so long as the table formatting remains consistent
Postcode = []
Borough = []
Neighbourhood = []
for row in soup.find_all("table")[0].tbody.find_all("tr"):
  for row2 in row.find_all("td"):
    if row2.p.contents[2].contents[0].string == "Not assigned":
      pass #skips entries that are not assigned
    else:
      Postcode.append(row2.p.b.string)
      entry = ""
      for item in row2.p.span.contents:
        try:
          entry = entry + item.string
        except:
          pass #skips "None" items
      
      Borough.append(entry.split(sep="(")[0])
      newentry = (entry.split(sep="(")[1].split(sep=")")[0])
      newentry = newentry.replace("/",",",-1)
      Neighbourhood.append(newentry)
      
        

In [5]:
#put into dataframe
data = {'Postcode': Postcode, 'Borough': Borough,"Neighbourhood":Neighbourhood}
df_Toronto = pd.DataFrame(data=data)

In [None]:
#displays dataframe
df_Toronto

In [None]:
#displays shape
df_Toronto.shape

In [None]:
#this is to show there are no postcodes repeated
len(df_Toronto["Postcode"].unique())

In [9]:
#after a lot of playing around, I used the provided coords - I don't think getting coords through area code works
#creates a dataframe for the coords
df_coords = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv")

In [None]:
#displays the head
df_coords.head()

In [11]:
#sorts both dataframes in order of postal code, and resets index
df_coords.sort_values("Postal Code",inplace=True)
df_coords.reset_index(inplace=True,drop=True)
df_Toronto.sort_values("Postcode",inplace=True)
df_Toronto.reset_index(inplace=True,drop=True)

In [None]:
#displays Toronto head - order is now the same as the dataframe above
df_Toronto.head()

In [13]:
#adds latitute and longitude from df_coords to df_Toronto
df_Toronto["Latitude"] = df_coords["Latitude"]
df_Toronto["Longitude"] = df_coords["Longitude"]

In [None]:
#displays full dataframe
df_Toronto

In [15]:
#shows a count of each borough
df_Toronto.groupby(by="Borough").count()

Unnamed: 0_level_0,Postcode,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,17,17,17,17
Downtown TorontoStn A PO Boxes25 The Esplanade,1,1,1,1
East Toronto,4,4,4,4
East TorontoBusiness reply mail Processing Centre969 Eastern,1,1,1,1
East York,4,4,4,4
East YorkEast Toronto,1,1,1,1
Etobicoke,11,11,11,11
EtobicokeNorthwest,1,1,1,1
MississaugaCanada Post Gateway Processing Centre,1,1,1,1


In [16]:
#sets geolocator
geolocator = Nominatim(user_agent="Toronto")

In [17]:
#gets location of Toronto
location = geolocator.geocode("Toronto, CA")

In [18]:
#creates map of Toronto
map_Toronto = folium.Map(location=[location.latitude, location.longitude], zoom_start=10)

In [19]:
#adds circle markers to the map for the given latitudes and longitudes, and displays all the data in the popup
for post,bor,hood,lat,lon in zip(df_Toronto["Postcode"],df_Toronto["Borough"],df_Toronto["Neighbourhood"],df_Toronto["Latitude"],df_Toronto["Longitude"]):
  label = f"{post} BOROUGH: {bor} NEIGHBOURHOOD(S): {hood}"
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat,lon],
      radius = 5,
      popup=label,
      color = "blue",
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7,
      parse_html=False).add_to(map_Toronto)

In [20]:
#displays map
map_Toronto

In [21]:
#creates a dataframe for Scarborough
df_Scarborough = df_Toronto[df_Toronto['Borough'] == 'Scarborough'].reset_index(drop=True)
df_Scarborough.drop(columns="Borough",inplace=True)
df_Scarborough.head()

Unnamed: 0,Postcode,Neighbourhood,Latitude,Longitude
0,M1B,"Malvern , Rouge",43.806686,-79.194353
1,M1C,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Woburn,43.770992,-79.216917
4,M1H,Cedarbrae,43.773136,-79.239476


In [22]:
#gets location of Scarborough
location = geolocator.geocode("Scarborough,Toronto,CA")
latitude = location.latitude
longitude = location.longitude

In [23]:
#creates map of Scarborough
map_Scarborough = folium.Map(location=[latitude,longitude],zoom_start=11)

In [24]:
#adds circle markers for each postcode
for post,hood,lat,lon in zip(df_Scarborough["Postcode"],df_Scarborough["Neighbourhood"],df_Scarborough["Latitude"],df_Scarborough["Longitude"]):
  label = f"{post} Neighbourhoods: {hood}"
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat,lon],
      radius = 5,
      popup=label,
      color = "blue",
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7,
      parse_html=False).add_to(map_Scarborough)

In [25]:
#displays map
map_Scarborough

In [26]:
#get location of Agincourt
location = geolocator.geocode("Agincourt,Toronto,CA")
latitude = location.latitude
longitude = location.longitude

In [27]:
#load in foursquare credentials
CLIENT_ID = 'GWR1FEF3SNLCDYO031MWFZYVGOVB2B5L1CBRUNTZJMEI5RTG' # your Foursquare ID
CLIENT_SECRET = '1QHUWYDASJYOTR33A5O1ZISJD5QA31TAT0K4HXTQMMYMI5QK' # your Foursquare Secret
ACCESS_TOKEN = "T3AWGIXSAJJ41QDNZMQ41TJ0ZCKKJKVXIJSEHC0WHQV52DSX"
VERSION = 20210517

In [28]:
#create URL for foursquare search of venues
radius = 500
limit = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, radius, limit)

In [29]:
#most of the code from this point is taken from DS0701EN-3-3-2-Neighborhoods-New-York-py-v2.0.ipynb by Lakshmi Holla
#this returns the closest 100 venues to Agincourt
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60a3a6f1d66b3702e00df6eb'},
 'notifications': [{'item': {'unreadCount': 0}, 'type': 'notificationTray'}],
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4bdda89c587b2d7fd88a5409-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d142941735',
         'name': 'Asian Restaurant',
         'pluralName': 'Asian Restaurants',
         'primary': True,
         'shortName': 'Asian'}],
       'id': '4bdda89c587b2d7fd88a5409',
       'location': {'address': '8 Glen Watford Drive',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'distance': 242,
        'formattedAddress': ['8 Glen Watford Drive',
         'Toronto ON M1S 2B7',
         '

In [30]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [31]:
#clean the json and structure it into a pandas dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,One2 Snacks,Asian Restaurant,43.787048,-79.276658
1,Tim Hortons,Coffee Shop,43.785637,-79.279215
2,In Cheon House Korean & Japanese Restaurant 인천관,Korean Restaurant,43.786468,-79.275693
3,Beef Noodle Restaurant 老李牛肉麵,Chinese Restaurant,43.785937,-79.276031
4,大泉港式快餐 Great Fountain Fast Food,Food Court,43.786835,-79.2774
5,Congee King,Chinese Restaurant,43.785908,-79.276042
6,Green Bamboo 青竹,Vietnamese Restaurant,43.785346,-79.27764
7,Lung Tung BBQ Food,Asian Restaurant,43.784836,-79.277837
8,Midland & Sheppard,Intersection,43.785249,-79.278745
9,Top Beauty & Spa,Spa,43.786664,-79.276979


In [32]:
#number of venues
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0])) 

38 venues were returned by Foursquare.


In [33]:
#function to explore a location and return the items in a dataframe
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [34]:
#runs the function
venues_Scarborough = getNearbyVenues(names=df_Scarborough['Postcode'],
                                   latitudes=df_Scarborough['Latitude'],
                                   longitudes=df_Scarborough['Longitude']
                                  )

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X


In [35]:
#check size and display head
print(venues_Scarborough.shape)
venues_Scarborough.head()

(89, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,M1B,43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,M1E,43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [36]:
#number of venues per neighbourhood
venues_Scarborough.groupby("Neighbourhood").count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,2,2,2,2,2,2
M1C,1,1,1,1,1,1
M1E,9,9,9,9,9,9
M1G,5,5,5,5,5,5
M1H,8,8,8,8,8,8
M1J,2,2,2,2,2,2
M1K,5,5,5,5,5,5
M1L,9,9,9,9,9,9
M1M,2,2,2,2,2,2
M1N,4,4,4,4,4,4


In [37]:
#determines number of unique categories
print('There are {} uniques categories.'.format(len(venues_Scarborough['Venue Category'].unique())))

There are 52 uniques categories.


In [38]:
#Analysing each neighbourhood

# one hot encoding
scarborough_onehot = pd.get_dummies(venues_Scarborough[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighbourhood'] = venues_Scarborough['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Convenience Store,Department Store,Discount Store,Donut Shop,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,General Entertainment,Hakka Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean BBQ Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Motel,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Print Shop,Rental Car Location,Restaurant,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Vietnamese Restaurant
0,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,M1C,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M1E,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M1E,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [39]:
#size of new dataframe
scarborough_onehot.shape

(89, 53)

In [40]:
#group by neighbourhood, and find mean for each category
scarborough_grouped = scarborough_onehot.groupby('Neighbourhood').mean().reset_index()
scarborough_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Convenience Store,Department Store,Discount Store,Donut Shop,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,General Entertainment,Hakka Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean BBQ Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Motel,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Print Shop,Rental Car Location,Restaurant,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Vietnamese Restaurant
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0
4,M1H,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
5,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
6,M1K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.2,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0
8,M1M,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M1N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0


In [41]:
#size of grouped dataframe
scarborough_grouped.shape

(16, 53)

In [42]:
#show top 5 venues in each neighbourhood
num_top_venues = 5

for hood in scarborough_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                   venue  freq
0   Fast Food Restaurant   0.5
1             Print Shop   0.5
2    American Restaurant   0.0
3                   Park   0.0
4  Korean BBQ Restaurant   0.0


----M1C----
                       venue  freq
0                        Bar   1.0
1        American Restaurant   0.0
2                  Pet Store   0.0
3      Korean BBQ Restaurant   0.0
4  Latin American Restaurant   0.0


----M1E----
                venue  freq
0        Intersection  0.11
1                Bank  0.11
2      Breakfast Spot  0.11
3      Medical Center  0.11
4  Mexican Restaurant  0.11


----M1G----
                   venue  freq
0            Coffee Shop   0.4
1               Pharmacy   0.2
2  Korean BBQ Restaurant   0.2
3           Soccer Field   0.2
4              Pet Store   0.0


----M1H----
                venue  freq
0     Thai Restaurant  0.12
1              Bakery  0.12
2                Bank  0.12
3  Athletics & Sports  0.12
4    Hakka Restaurant  0.12


----M1J----


In [43]:
#function to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [44]:
#find top 10 venues in each neighbourhood, and load into dataframe

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = scarborough_grouped['Neighbourhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Print Shop,Vietnamese Restaurant,Coffee Shop,Gas Station,Furniture / Home Store,Fried Chicken Joint,Electronics Store,Donut Shop,Discount Store
1,M1C,Bar,Vietnamese Restaurant,College Stadium,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop
2,M1E,Intersection,Bank,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Donut Shop,Medical Center,Mexican Restaurant,Vietnamese Restaurant
3,M1G,Coffee Shop,Soccer Field,Korean BBQ Restaurant,Pharmacy,Vietnamese Restaurant,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop
4,M1H,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Gas Station,Fried Chicken Joint,Caribbean Restaurant,Thai Restaurant,Department Store,General Entertainment


In [45]:
#clusters neighbourhoods into 5 clusters

# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 1, 1, 1, 3, 1, 1, 2, 1], dtype=int32)

In [46]:
#creates new dataframe to include cluster and top 10 venues

# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = df_Scarborough

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Postcode',)

scarborough_merged.drop(index=16,inplace=True)
scarborough_merged.head() # check the last columns!

Unnamed: 0,Postcode,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,"Malvern , Rouge",43.806686,-79.194353,4.0,Fast Food Restaurant,Print Shop,Vietnamese Restaurant,Coffee Shop,Gas Station,Furniture / Home Store,Fried Chicken Joint,Electronics Store,Donut Shop,Discount Store
1,M1C,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497,0.0,Bar,Vietnamese Restaurant,College Stadium,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop
2,M1E,"Guildwood , Morningside , West Hill",43.763573,-79.188711,1.0,Intersection,Bank,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Donut Shop,Medical Center,Mexican Restaurant,Vietnamese Restaurant
3,M1G,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Soccer Field,Korean BBQ Restaurant,Pharmacy,Vietnamese Restaurant,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop
4,M1H,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Gas Station,Fried Chicken Joint,Caribbean Restaurant,Thai Restaurant,Department Store,General Entertainment


In [47]:
#puts clusters on a map
location = geolocator.geocode("Scarborough,Toronto,CA")
latitude = location.latitude
longitude = location.longitude

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

In [48]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Postcode'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [49]:
#Examines cluster 1
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Rouge Hill , Port Union , Highland Creek",Bar,Vietnamese Restaurant,College Stadium,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop


In [50]:
#Examines cluster 2
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Guildwood , Morningside , West Hill",Intersection,Bank,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Donut Shop,Medical Center,Mexican Restaurant,Vietnamese Restaurant
3,Woburn,Coffee Shop,Soccer Field,Korean BBQ Restaurant,Pharmacy,Vietnamese Restaurant,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop
4,Cedarbrae,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Gas Station,Fried Chicken Joint,Caribbean Restaurant,Thai Restaurant,Department Store,General Entertainment
6,"Kennedy Park , Ionview , East Birchmount Park",Coffee Shop,Discount Store,Department Store,Convenience Store,Chinese Restaurant,College Stadium,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint
7,"Golden Mile , Clairlea , Oakridge",Bakery,Bus Line,Ice Cream Shop,Intersection,Bus Station,Park,Soccer Field,Bank,Convenience Store,Furniture / Home Store
9,"Birch Cliff , Cliffside West",General Entertainment,Skating Rink,Café,College Stadium,Coffee Shop,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
10,"Dorset Park , Wexford Heights , Scarborough To...",Indian Restaurant,Vietnamese Restaurant,Pet Store,Furniture / Home Store,Light Rail Station,Chinese Restaurant,Bar,Department Store,Athletics & Sports,Auto Garage
11,"Wexford , Maryvale",Middle Eastern Restaurant,Smoke Shop,Auto Garage,Sandwich Place,Bakery,Vietnamese Restaurant,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
12,Agincourt,Skating Rink,Latin American Restaurant,Breakfast Spot,Lounge,Vietnamese Restaurant,College Stadium,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
13,"Clarks Corners , Tam O'Shanter , Sullivan",Pizza Place,Fast Food Restaurant,Noodle House,Thai Restaurant,Fried Chicken Joint,Italian Restaurant,Convenience Store,Chinese Restaurant,Gas Station,Bank


In [51]:
#Examines cluster 3
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Cliffside , Cliffcrest , Scarborough Village West",American Restaurant,Motel,Hakka Restaurant,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop,Discount Store


In [52]:
#Examines cluster 4
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough Village,Smoke Shop,Playground,Vietnamese Restaurant,Coffee Shop,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Donut Shop


In [53]:
#Examines cluster 5
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Malvern , Rouge",Fast Food Restaurant,Print Shop,Vietnamese Restaurant,Coffee Shop,Gas Station,Furniture / Home Store,Fried Chicken Joint,Electronics Store,Donut Shop,Discount Store
