In [23]:
import pandas as pd

In [24]:
canada_df = pd.read_csv("IBM_Dataset/Canada_data.csv")

In [30]:
canada_df = canada_df.drop("Unnamed: 0", 1)
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [31]:
geo_df = pd.read_csv("IBM_Dataset/Geospatial_Coordinates.csv")

In [32]:
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [33]:
# Checking the rows are equal
geo_df.shape[0] == canada_df.shape[0]

True

In [34]:
# Combining both dataframes
combine_df = pd.merge(canada_df, geo_df)

In [35]:
# Checking the columns
combine_df.columns

Index(['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'], dtype='object')

In [37]:
combine_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [38]:
combine_df.shape

(103, 5)

In [39]:
import folium

In [62]:
latitude = combine_df["Latitude"]
longitude = combine_df["Longitude"]
borough = combine_df["Borough"]
Neighborhood = combine_df["Neighborhood"]

In [63]:
latitude[0]

43.7532586

# Let's Generate Map

In [80]:

map_data = folium.Map(location=[latitude[0], longitude[0]], zoom_start=10)
for lat, lon, bor, neigh in zip(latitude, longitude, borough, Neighborhood):
    label = folium.Popup("{}, {}".format(neigh,bor), parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius = 5,
    popup = label,
    color="blue",
    fill=True,
    fill_color = "red",
    fill_opacity=0.7,
    parse_html=False).add_to(map_data)
    
map_data

# Cluster Neighborhoods

In [86]:
from sklearn.cluster import KMeans

In [92]:

k_clusters = 5

kmeans = KMeans(n_clusters=k_clusters, random_state=0)

kmeans

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [96]:
combine_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [98]:
group_clustering = combine_df.drop("Neighborhood", 1)

In [108]:
group_clustering = group_clustering.drop("Postal Code", 1)
group_clustering = group_clustering.drop("Borough", 1)

In [109]:
group_clustering.head()

Unnamed: 0,Latitude,Longitude
0,43.753259,-79.329656
1,43.725882,-79.315572
2,43.65426,-79.360636
3,43.718518,-79.464763
4,43.662301,-79.389494


In [110]:
kmeans.fit(group_clustering)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [112]:
kmeans.labels_

array([4, 4, 2, 3, 2, 1, 0, 4, 4, 2, 3, 1, 0, 4, 4, 2, 2, 1, 0, 4, 2, 2,
       0, 4, 2, 2, 0, 3, 3, 4, 2, 2, 0, 3, 3, 4, 2, 2, 0, 3, 3, 4, 2, 2,
       4, 3, 1, 4, 2, 1, 1, 0, 3, 1, 4, 3, 1, 1, 4, 3, 1, 3, 3, 1, 1, 0,
       3, 3, 2, 1, 1, 0, 3, 3, 2, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 0, 2, 2,
       1, 1, 0, 2, 2, 1, 1, 0, 2, 2, 1, 2, 4, 1, 1])

In [114]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [118]:
import numpy as np

In [140]:
map_data = folium.Map(location=[latitude[0], longitude[0]], zoom_start=10)


#set color scheme for the clusters
x = np.arange(k_clusters)
ys = [i + x + (i*x)**2 for i in range(k_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lat, lon, bor, neigh in zip(latitude, longitude, borough, Neighborhood):
    label = folium.Popup("{}, {}".format(neigh,bor), parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius = 5,
    popup = label,
    color='blue',
    fill=True,
    fill_color = int(lon)-1,
    fill_opacity=0.5,
    parse_html=False).add_to(map_data)
    
map_data