# __Segmenting and Clustering Neighborhoods in Toronto__
## __PART 2: Getting the latitude and the longitude coordinates of each neighborhood__

__Before we get the data and start exploring it, let's import the libraries that we will need.__

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import io

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

__First of all, we download the dataframe that we created in the first part__

In [2]:
df = pd.read_csv('Toronto_Part_1.csv')
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


__Secondly, we download the geospatial data from the given link__

In [3]:
url = "http://cocl.us/Geospatial_data"
s = requests.get(url).content
df_geo = pd.read_csv(io.StringIO(s.decode('utf-8')))
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [4]:
print('The shape of the dataframe which includes boroughs and neighbourhoods is:', df.shape)
print('The shape of the dataframe which includes latitudes and longitudes is:', df_geo.shape)

The shape of the dataframe which includes boroughs and neighbourhoods is: (103, 3)
The shape of the dataframe which includes latitudes and longitudes is: (103, 3)


__Since they have Postal Code in common, we merge these two dataframes__

In [5]:
df_new = df.merge(df_geo, on='Postal Code', how='outer')
df_new

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


__It is easier to upload the dataframe to .csv in order to use it in other notebooks__

In [6]:
df_new.to_csv('Toronto_Part_2.csv', index=False)

In [7]:
print('The number of rows of the dataframe is', len(df_new))
print(df_new.shape)

The number of rows of the dataframe is 103
(103, 5)
