Importing the neccessary libraries 

In [1]:
import pandas as pd 
import numpy as np 

  ### Part 1

Scraping the  table data from the Wikipedia page and determine the size of it .

In [2]:
df=pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=1012118802')[0]
number_of_rows=df.shape[0]
number_of_columns=df.shape[1]
print(f'The original DataFrame has {number_of_rows} rows and {number_of_columns} columns')
df

The original DataFrame has 180 rows and 3 columns


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


Data Manipulation to make the required changes in the data 


In [3]:
# Exclude 'not assigned' Boroughs rows 
df = df[df['Borough'] != 'Not assigned']
# Rename a column 
df.rename(columns={'Neighbourhood':'Neighborhood'},inplace=True)
# if exists , Combine rows into one row with the neighborhoods separated with a comma
df = df.groupby('Postal Code', sort=False).agg(', '.join)

# Detecting if there are 'Not assigned' neighborhoods
Not_assigned_neighborhoods=df[df['Neighborhood']=="Not assigned"]
not_assigned_rows=Not_assigned_neighborhoods.shape[0]
print(f' The number of Not assigned neighborhoods is  {not_assigned_rows}')

# Reset the index
df.reset_index(inplace=True)

# Display the shape of final dataframe :
final_shape=df.shape
print(f' The size of final dataframe is {final_shape}')



 The number of Not assigned neighborhoods is  0
 The size of final dataframe is (103, 3)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


### Part 2 

Get the neighbhorhoods' latitude and longitude


In [4]:
Coordinates_data=pd.read_csv(r'C:\Users\403116\Desktop\Geospatial_Coordinates.csv')
Coordinates_data

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


Combine the two dataframes :

In [5]:
df_final=pd.merge(df,Coordinates_data,on="Postal Code",how='left')
df_final

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### Part 3

Install and Import the needed libraries


In [6]:
!pip install folium
import folium

!pip install geopy
from geopy.geocoders import Nominatim
print('done')

done


Use Geolocater to determine the latitude and longitude for the city of Toronto ,CA


In [7]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="TR_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 43.6534817, -79.3839347.


Visualization the neighborhoods in the map .

In [8]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df_final['Latitude'], df_final['Longitude'], df_final['Borough'], df_final['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Identify the  FourSquare credentials

In [9]:
# Personalized FOURSQUARE Credentials here
CLIENT_ID = 'ASW3CV1KARXMO54QIDQE3TYMFWMSWD4N4HWZAURXYDBGWB2C'
CLIENT_SECRET = 'AA1SH3ELZ1X0S40ZNSKZYFEH2ZRCMHMXYVQ5JLX1XFR4UA5V'
VERSION = '20210530'

Create a function to process all neighborhoods to get their nearby venues.

In [10]:
import requests
LIMIT = 50
radius = 500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [11]:
venues = getNearbyVenues(names=df_final['Neighborhood'], latitudes=df_final['Latitude'], longitudes=df_final['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

Run the functions for all neighborhoods in Toronto.

In [12]:
#Examine the resultant dataframe and its size .
print(venues.shape)
venues.head()

(1664, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop



Number of venues explored for each neighborhood

In [13]:
venues.groupby('Neighborhood',as_index=False).count()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agincourt,4,4,4,4,4,4
1,"Alderwood, Long Branch",9,9,9,9,9,9
2,"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20
3,Bayview Village,4,4,4,4,4,4
4,"Bedford Park, Lawrence Manor East",27,27,27,27,27,27
...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale East",33,33,33,33,33,33
92,"Willowdale, Willowdale West",4,4,4,4,4,4
93,Woburn,4,4,4,4,4,4
94,Woodbine Heights,6,6,6,6,6,6


In [14]:
venues.describe(include="all")

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
count,1664,1664.0,1664.0,1664,1664.0,1664.0,1664
unique,96,,,1192,,,259
top,Central Bay Street,,,Starbucks,,,Coffee Shop
freq,50,,,31,,,128
mean,,43.679356,-79.391118,,43.679356,-79.390961,
std,,0.044088,0.061268,,0.043987,0.061363,
min,,43.602414,-79.615819,,43.599152,-79.621765,
25%,,43.648429,-79.405678,,43.648835,-79.407665,
50%,,43.661608,-79.38228,,43.661331,-79.382058,
75%,,43.705369,-79.367675,,43.705739,-79.366841,


Checking how many unique Categories in Toronto 

In [15]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 259 uniques categories.


### Analysis for each neighborhood

In [16]:
# Dummies encoding
venues_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")


Venues column to be added as the first column in the dummies DataFrame

In [17]:
venues_onehot['Neighborhood'] = venues['Neighborhood']

temp = list(venues_onehot.columns)

if 'Neighborhood' in temp:
    temp.remove('Neighborhood')
    
fixed_columns = ['Neighborhood'] + temp
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head(10)

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group by the neighborhood and take the averages for the categories 

In [18]:
venues_grouped = venues_onehot.groupby('Neighborhood', sort=False).mean().reset_index()
print(venues_grouped.shape)
venues_grouped

(96, 259)


Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.000000
1,Victoria Village,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.000000
2,"Regent Park, Harbourfront",0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0000,0.0,0.022727
3,"Lawrence Manor, Lawrence Heights",0.1,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.000000,0.0000,0.0,0.000000
4,"Queen's Park, Ontario Provincial Government",0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.029412
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.000000
92,Church and Wellesley,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.040000
93,"Business reply mail Processing Centre, South C...",0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.058824
94,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0000,0.0,0.000000


Get the top 4 venues per neighborhood

In [19]:

num_top_venues = 4

for hood in venues_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = venues_grouped[venues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Parkwoods----
                        venue  freq
0        Fast Food Restaurant  0.33
1           Food & Drink Shop  0.33
2                        Park  0.33
3  Modern European Restaurant  0.00


----Victoria Village----
                        venue  freq
0                Hockey Arena   0.2
1  Financial or Legal Service   0.2
2                 Coffee Shop   0.2
3       Portuguese Restaurant   0.2


----Regent Park, Harbourfront----
            venue  freq
0     Coffee Shop  0.18
1          Bakery  0.07
2            Park  0.07
3  Breakfast Spot  0.05


----Lawrence Manor, Lawrence Heights----
                   venue  freq
0         Clothing Store   0.2
1      Accessories Store   0.1
2  Vietnamese Restaurant   0.1
3               Boutique   0.1


----Queen's Park, Ontario Provincial Government----
               venue  freq
0        Coffee Shop  0.21
1   Sushi Restaurant  0.06
2        Yoga Studio  0.03
3  College Cafeteria  0.03


----Malvern, Rouge----
                  venue  fr

                    venue  freq
0  Furniture / Home Store  0.33
1              Restaurant  0.33
2             Pizza Place  0.33
3               Pet Store  0.00


----Cliffside, Cliffcrest, Scarborough Village West----
                 venue  freq
0  American Restaurant  0.33
1         Skating Rink  0.33
2                Motel  0.33
3   Mexican Restaurant  0.00


----Willowdale, Newtonbrook----
                 venue  freq
0            Piano Bar   1.0
1          Pizza Place   0.0
2               Market   0.0
3  Martial Arts School   0.0


----Studio District----
         venue  freq
0  Coffee Shop  0.08
1      Brewery  0.06
2    Gastropub  0.06
3         Café  0.06


----Bedford Park, Lawrence Manor East----
            venue  freq
0     Pizza Place  0.07
1     Coffee Shop  0.07
2      Restaurant  0.07
3  Sandwich Place  0.07


----Del Ray, Mount Dennis, Keelsdale and Silverthorn----
            venue  freq
0     Coffee Shop  0.25
1  Discount Store  0.25
2      Restaurant  0.25
3  Sandw

### Most common venues for each neighborhood

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = venues_grouped['Neighborhood']

for ind in np.arange(venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Parkwoods,Fast Food Restaurant,Park,Food & Drink Shop,Yoga Studio,Doner Restaurant
1,Victoria Village,Intersection,Coffee Shop,Financial or Legal Service,Portuguese Restaurant,Hockey Arena
2,"Regent Park, Harbourfront",Coffee Shop,Bakery,Park,Café,Breakfast Spot
3,"Lawrence Manor, Lawrence Heights",Clothing Store,Vietnamese Restaurant,Miscellaneous Shop,Furniture / Home Store,Shoe Store
4,"Queen's Park, Ontario Provincial Government",Coffee Shop,Sushi Restaurant,Yoga Studio,Distribution Center,Salad Place
...,...,...,...,...,...,...
91,"The Kingsway, Montgomery Road, Old Mill North",River,Yoga Studio,Department Store,Escape Room,Electronics Store
92,Church and Wellesley,Sushi Restaurant,Coffee Shop,Yoga Studio,Japanese Restaurant,Restaurant
93,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Garden Center,Fast Food Restaurant,Farmers Market
94,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Baseball Field,Business Service,Yoga Studio,Diner,Event Space


### Cluster neighborhoods by using K means 

In [22]:
from sklearn.cluster import KMeans

Fit 4 clusters to the data of venues .

In [23]:
k = 4
venues_grouped_clustering = venues_grouped.drop('Neighborhood', axis=1)
kmeans = KMeans(n_clusters=k, random_state=None).fit(venues_grouped_clustering)
kmeans.labels_

array([2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 3, 0, 0, 1, 0])

Create and view  a new dataframe that includes cluster labels and the top 5 venues

In [24]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [32]:
venues_merged = df_final
venues_merged = venues_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
venues_merged.dropna(inplace=True)
venues_merged['Cluster Labels'] = venues_merged['Cluster Labels'].astype(int)
#let the cluster labeles to start from 1 and ends at 4 . ( 4 clusters )
venues_merged['Cluster Labels'] = venues_merged['Cluster Labels']+1



Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3,Fast Food Restaurant,Park,Food & Drink Shop,Yoga Studio,Doner Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Intersection,Coffee Shop,Financial or Legal Service,Portuguese Restaurant,Hockey Arena
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,1,Coffee Shop,Bakery,Park,Café,Breakfast Spot
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Clothing Store,Vietnamese Restaurant,Miscellaneous Shop,Furniture / Home Store,Shoe Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,Coffee Shop,Sushi Restaurant,Yoga Studio,Distribution Center,Salad Place
...,...,...,...,...,...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,4,River,Yoga Studio,Department Store,Escape Room,Electronics Store
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,1,Sushi Restaurant,Coffee Shop,Yoga Studio,Japanese Restaurant,Restaurant
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,1,Light Rail Station,Yoga Studio,Garden Center,Fast Food Restaurant,Farmers Market
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,2,Baseball Field,Business Service,Yoga Studio,Diner,Event Space


In [33]:
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(venues_merged['Latitude'], venues_merged['Longitude'], venues_merged['Neighborhood'], venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

viewing cluster #1 list 


In [34]:
venues_merged.loc[venues_merged['Cluster Labels'] == 1, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,North York,1,Intersection,Coffee Shop,Financial or Legal Service,Portuguese Restaurant,Hockey Arena
2,Downtown Toronto,1,Coffee Shop,Bakery,Park,Café,Breakfast Spot
3,North York,1,Clothing Store,Vietnamese Restaurant,Miscellaneous Shop,Furniture / Home Store,Shoe Store
4,Downtown Toronto,1,Coffee Shop,Sushi Restaurant,Yoga Studio,Distribution Center,Salad Place
7,North York,1,Gym,Restaurant,Coffee Shop,Chinese Restaurant,Sandwich Place
...,...,...,...,...,...,...,...
96,Downtown Toronto,1,Coffee Shop,Café,Pizza Place,Restaurant,Bakery
97,Downtown Toronto,1,Café,Coffee Shop,Restaurant,Hotel,Gym
99,Downtown Toronto,1,Sushi Restaurant,Coffee Shop,Yoga Studio,Japanese Restaurant,Restaurant
100,East Toronto,1,Light Rail Station,Yoga Studio,Garden Center,Fast Food Restaurant,Farmers Market


Viewing cluster #2 list 

In [35]:
venues_merged.loc[venues_merged['Cluster Labels'] == 2, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
57,North York,2,Baseball Field,Yoga Studio,Dim Sum Restaurant,Event Space,Ethiopian Restaurant
101,Etobicoke,2,Baseball Field,Business Service,Yoga Studio,Diner,Event Space


In [36]:
venues_merged.loc[venues_merged['Cluster Labels'] == 3, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,3,Fast Food Restaurant,Park,Food & Drink Shop,Yoga Studio,Doner Restaurant
6,Scarborough,3,Fast Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room
21,York,3,Park,Women's Store,Dessert Shop,Ethiopian Restaurant,Escape Room
61,Central Toronto,3,Bus Line,Park,Swim School,Yoga Studio,Doner Restaurant
64,York,3,Park,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Escape Room
66,North York,3,Park,Convenience Store,Yoga Studio,Donut Shop,Diner
77,Etobicoke,3,Park,Sandwich Place,Mobile Phone Shop,Yoga Studio,Dessert Shop
91,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Dog Run


In [37]:
venues_merged.loc[venues_merged['Cluster Labels'] == 4, venues_merged.columns[[1] + list(range(5, venues_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
98,Etobicoke,4,River,Yoga Studio,Department Store,Escape Room,Electronics Store


# Analysis is done , have a good day 