## Geographical Coordinates for the Neighbourhoods of Toronto

### Importing the required libraries

In [4]:
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes

from bs4 import BeautifulSoup
import requests
import wikipedia as wp
import geocoder # import geocoder

### Repeating the same procedure to get the Postcode dataframe 

In [5]:
#Getting the HTML source
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")
# Converting to a dataframe
df = pd.read_html(html, header=0)[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
# Dropping the rows where Borough is 'Not Assigned'
df.drop(df[df.Borough == "Not assigned"].index, inplace=True)
df = df.reset_index(drop=True)
# df.head(100)

In [7]:
# Reassigning the Neighbourhood where it was Not Assigned
df["Neighbourhood"][df.Neighbourhood == "Not assigned"] = df.Borough
# df.head(20)

In [8]:
# Combining the neighbourhood values on the basis of same Postcode
df1 = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df1.sort_values(by=["Postcode"])
df1.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [None]:
# The Geocoder module doesn't seem to work for me so I decided to use the csv 
# given by Alex and work with that

# latitude = list()
# longitude = list()
# for postal_code in df1.Postcode:
#     # initialize your variable to None
#     lat_lng_coords = None

#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#         lat_lng_coords = g.latlng

#     latitude.append(lat_lng_coords[0])
#     longitude.append(lat_lng_coords[1])
     
# for lat, long in zip(latitude, longitude):    
#     print(lat, long)

### Loading the Geospatial Coordinates CSV into a dataframe

In [9]:
# Converting CSV to a dataframe
df2 = pd.read_csv("Geospatial_Coordinates.csv")
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Mapping the latitude and longitudes to the existing dataframe

In [10]:
# Mapping longitudes and latitudes
df1["Latitude"] = df1.Postcode.map(df2.set_index("Postal Code")["Latitude"])
df1["Longitude"] = df1.Postcode.map(df2.set_index("Postal Code")["Longitude"])
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Shape of the resultant dataframe 

In [11]:
print(df1.shape)

(103, 5)
