# 2. Geocoding Toronto Postal Codes

In [22]:
import geocoder    #geocoder did not work for me. I have used the provided csv instead
import pandas as pd

#### Read the csv file created earlier into a dataframe

In [23]:
df = pd.read_csv("toronto.csv")

In [24]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,Postcode,Borough,Neighbourhood
0,0,M1B,Scarborough,"Rouge, Malvern"
1,1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,2,M1E,Scarborough,"Guildwood\r\n, Morningside, West Hill"
3,3,M1G,Scarborough,Woburn
4,4,M1H,Scarborough,Cedarbrae


### Clean the data: remove unwanted column and remove special characters

In [25]:
#remove the unwanted column
df = df.drop("Unnamed: 0", axis = 1)

#remove the special characters from neighbourhood names
df = df.replace({"\n": "", "\r": ""}, regex=True)

print("There are {} unique neighbourhoods and {} unique boroughs in Toronto dataset.".format(len(df.Neighbourhood.unique()),
                                                                                             len(df.Borough.unique())))

There are 103 unique neighbourhoods and 11 unique boroughs in Toronto dataset.


In [26]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [27]:
'''
# initialize your variable to None
lat_long_coords = None

# loop until you get the coordinates
while(lat_long_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format(df.Postcode))
    lat_long_coords = g.latlng

latitude = lat_long_coords[0]
longitude = lat_long_coords[1]
'''

"\n# initialize your variable to None\nlat_long_coords = None\n\n# loop until you get the coordinates\nwhile(lat_long_coords is None):\n    g = geocoder.google('{}, Toronto, Ontario'.format(df.Postcode))\n    lat_long_coords = g.latlng\n\nlatitude = lat_long_coords[0]\nlongitude = lat_long_coords[1]\n"

### Read the csv containing the coordinates into a dataframe

In [28]:
df_coord = pd.read_csv("Geospatial_Coordinates.csv")

In [29]:
df_coord.columns

Index(['Postal Code', 'Latitude', 'Longitude'], dtype='object')

In [30]:
df_coord.columns = ["Postcode", "Latitude", "Longitude"]

In [31]:
df_coord.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Join the two dataframes on Postecode

In [32]:
df = df.join(df_coord.set_index("Postcode"), on = "Postcode")

In [33]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [34]:
df.shape

(103, 5)

### Write the dataframe into a csv for further use

In [35]:
df.to_csv("toronto_coords.csv")