# Coursera IBM 

## Segmenting and Clustering Neighborhoods in Toronto


In [1]:
# python version is 3.8.6, 64-bit
import numpy as np
import pandas as pd

## Task 1
> Copy data table from Wikipedia to the clickboard, then use this Pandas method to get a Data Frame

In [26]:
df_raw = pd.read_clipboard()

In [27]:
df_raw.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M1B,Scarborough,"Malvern, Rouge"
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn


Exclude rows in column "Borough" == "Not assigned"

In [42]:
df_clean = df_raw[df_raw['Borough'] != 'Not assigned']

In [41]:
df_clean.shape

(103, 3)

## Task 2

> I picked "ArcGIS" provider for GeoCoder module.
https://geocoder.readthedocs.io/providers/ArcGIS.html

> Here we iterate over all the postal codes and append received coordinates into dictionary.




In [93]:
coordinates_dict = {}
# get list of Postal Codes
postal_codes = df_clean['Postal Code'].to_numpy()
for code in postal_codes :
    g = geocoder.arcgis(f'{code} Canada')
    coordinates_dict[code] = (g.json['lat'], g.json['lng'])

In [142]:
# turnd dictionary into Data Frame
fd_coordinates = pd.DataFrame.from_dict(coordinates_dict, orient='index', dtype='float')

In [143]:
# rename columns names
fd_coordinates.rename(columns={0:'Latitude', 1:'Longitude'}, inplace=True)

In [155]:
# unify index in both "df" for murging
d1 = df_clean.reset_index().drop(columns='index')
d2 = fd_coordinates.reset_index().drop(columns='index')
df_full = pd.concat([d1, d2], axis=1)

In [156]:
df_full.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892


## Task 3

In [168]:
import folium

In [174]:
# coordinate of Toronto to position the map
g = geocoder.arcgis('Toronto Canada')
toronto_lat = g.json['lat']
toronto_lng = g.json['lng']

In [202]:
toronto_map = folium.Map(location=[toronto_lat, toronto_lng], zoom_start=10)

In [216]:
for lat, lng, label in zip(df_full['Latitude'], df_full['Longitude'], df_full['Neighbourhood']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(toronto_map)

toronto_map

> This is a picture in case if the map doesn't load over GitHub

![Image of Yaktocat](https://jo5u7g.by.files.1drv.com/y4mWVym4oBRWElWVWGHk3IWYJmAYl7BxdGA61FlydQ9UJkhedMrvjKiEIUxk-BqMwbmdCFstcZkh08PCLOeB-Md5wXcYI9HXLRNW4HXl37ETAFMNpOxmQuCBp68Tc3zRavLrhpdGwHzxQlM9sR8eER1qLyuqGOjkIDcUQvUViMvkgqzG5O48c6jSigXGh2DI7ZtkjJbmjXMEQYL2Elxu4T-mw/map.PNG?psid=1)