In [17]:
import numpy as np
import requests 
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium 

import geocoder

### Loading and storing coordinates

In [10]:
def get_latlng(postal_code):
    
    lat_lng_coords = None
    
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords
    
location = get_latlng('M4G')

In [18]:
Toronto_info  = pd.read_csv('Toronto_Postalcodes2.csv')
Toronto_info.head()

Unnamed: 0.1,Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [21]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(Toronto_info ['Borough'].unique()),
        Toronto_info.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


### Clustering and displaying neighbourhoods

In [20]:
toronto_map = folium.Map(location=[43.65, -79.4], zoom_start=12)

X = Toronto_info['Latitude']
Y = Toronto_info['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)

clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow']
Toronto_info['Cluster'] = clusters

for latitude, longitude, borough, cluster in zip(Toronto_info['Latitude'], Toronto_info['Longitude'], Toronto_info['Borough'], Toronto_info['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(toronto_map)  

toronto_map