# Segmenting and Clustering Neighborhoods in Toronto
### Assignement - Author : Jithin Prakash Kolamkolly
---
#### __PART I__
----

_Import from Wiki to Pandas_

In [1]:
#Install libraries/Modules needed
#!conda install -c conda-forge lxml --yes
#!conda install -c conda-forge geopy --yes

In [2]:
#Import Pandas
import pandas as pd

#Define URL
URL='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#Import to DataFrame
df_Canada =pd.read_html(URL, header=0)[0]

_1. Remove 'Not assigned' Boroughs_  
_2. Group Postal Code Neighbourhoods_  
_3. If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough._

In [3]:
#Option 1
df_Canada=df_Canada[df_Canada['Borough']!='Not assigned']
#Option 2
df_Canada=df_Canada.groupby("Postal Code").agg(lambda x:','.join(set(x)))
df_Canada.reset_index(inplace=True)
#Option 3
df_Canada.loc[df_Canada['Neighborhood']=="Not assigned",'Neighborhood']=df_Canada.loc[df_Canada['Neighborhood']=="Not assigned",'Borough']
#Shape of the dataframe

print("\n\nNumber of (Rows,Columns)  =  {} \n\n".format(df_Canada.shape))
df_Canada.head()



Number of (Rows,Columns)  =  (103, 3) 




Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


---
#### __PART II__
----

_Importing from the CSV file provided - to the dataFrame_

In [4]:
geo_csv_path = 'https://cocl.us/Geospatial_data'
df_geo = pd.read_csv(geo_csv_path)
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


_Joining the dataFrames using Postal code as the Key_

In [5]:
df_Canada = df_Canada.set_index('Postal Code').join(df_geo.set_index('Postal Code'))
df_Canada.reset_index(inplace=True)
df_Canada.head(11)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [6]:
df_Canada.shape

(103, 5)

----------
### __PART III__
_______

In [7]:
from geopy.geocoders import Nominatim 
import folium

*Agent ny_explorer is used for address as 'Toronto*

In [8]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


_Create map of New York using latitude and longitude values_

In [9]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_Canada['Latitude'], df_Canada['Longitude'], df_Canada['Borough'], df_Canada['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

_Fitering Scarborough Data_

In [10]:
Scarborough_data = df_Canada[df_Canada['Borough'] == 'Scarborough'].reset_index(drop=True)
Scarborough_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


*Agent ny_explorer is used for address as 'Scarborough'*

In [11]:
address = 'Scarborough, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [12]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(Scarborough_data['Latitude'], Scarborough_data['Longitude'], Scarborough_data['Borough'], Scarborough_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

_Fitering Totonto Data - Where Borough Contains Toronto_

In [13]:
Toronto_data = df_Canada[df_Canada['Borough'].str.contains("Toronto")].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


*Agent ny_explorer is used for address as 'Toronto'*

In [14]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [15]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Borough'], Toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork