# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
# importing the library
import pandas as pd

# reading the table from html page
borough = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [2]:
#reading the head of the dataframe
borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Removing the Boroughs which are having value as Not assigned

In [3]:
indexnames = borough[borough['Borough'] == 'Not assigned'].index

borough.drop(indexnames,inplace = True)

In [4]:
# head of the dataframe
borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


#### More than one neighborhood can exist in one postal code area. So,Those two rows will be combined into one row with the neighborhoods separated with a comma 

In [5]:
borough = borough.groupby('Postcode').agg({'Borough':'first', 
                             'Neighbourhood': ', '.join}).reset_index()

In [6]:
borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### For Borough's  having Neighbourhood as "Not assigned" , Assigning the Neighbourhood with the same name as Borough's name 

In [7]:
# replacing the value of neighbourhood with borough name
borough.Neighbourhood.replace('Not assigned',borough.Borough,inplace=True)

In [8]:
borough

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [9]:
# shape of the dataframe
borough.shape

(103, 3)

In [11]:
location = pd.read_csv('Desktop/Geospatial_Coordinates.csv')

In [12]:
location.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
location = location.drop('Postal Code',1)

In [14]:
location.head()

Unnamed: 0,Latitude,Longitude
0,43.806686,-79.194353
1,43.784535,-79.160497
2,43.763573,-79.188711
3,43.770992,-79.216917
4,43.773136,-79.239476


In [15]:
data = pd.concat([borough,location],axis = 1)

In [16]:
data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Segmentation and clustering

In [17]:
import folium

In [18]:
from geopy.geocoders import Nominatim

In [19]:
### getting the cordinates of Toronto
address = 'Toronto, ON,Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 43.653963, -79.387207.


#### Create a map of Toronto with neighborhoods superimposed on top

In [20]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


    
map_toronto

In [21]:
CLIENT_ID = '4GE5AJ2TZ1XOC3V1PJZL1JULZGGGLFS0T5GNOCMI0UJ5OMAB' # your Foursquare ID
CLIENT_SECRET = '2FVHGA44OEZDSIQALXVW20QHES13DHYZBSOQ5SRNI2L2FEO1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4GE5AJ2TZ1XOC3V1PJZL1JULZGGGLFS0T5GNOCMI0UJ5OMAB
CLIENT_SECRET:2FVHGA44OEZDSIQALXVW20QHES13DHYZBSOQ5SRNI2L2FEO1


In [29]:
downtown_toronto_borough = data[data['Borough'].str.contains("Downtown Toronto")]

In [30]:
downtown_toronto_borough.reset_index()

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,50,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,51,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
2,52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,53,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
4,54,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
5,55,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,56,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,58,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
9,59,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
