# PART A

## Importing required libraries/packages

In [37]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

## Initializing required variables and web scarpping

In [38]:
wiki_data = BeautifulSoup(requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text, 'lxml')
column_names = ['Postalcode','Borough','Neighborhood']
toronto = pd.DataFrame(columns = column_names)
content = wiki_data.find('div', class_='mw-parser-output')
postalcode = 0
borough = 0
neighborhood = 0

## Extracting required information and transforming it into a dataframe

In [39]:
for tr in content.table.tbody.find_all('tr'):
    i = 0
    for td in tr.find_all('td'):
        if i == 0:
            postalcode = td.text.strip('\n')
            i = i + 1
        elif i == 1:
            borough = td.text.strip('\n')
            i = i + 1
        elif i == 2: 
            neighborhood = td.text.strip('\n').replace(']','')
    toronto = toronto.append({'Postalcode': postalcode,'Borough': borough,'Neighborhood': neighborhood},ignore_index=True)

## Cleaning 
### Ignoring cells with a borough that is Not assigned and If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.

In [40]:
toronto = toronto[toronto.Borough!='Not assigned']
toronto = toronto[toronto.Borough!= 0]
toronto.reset_index(drop = True, inplace = True)
i = 0
for i in range(0,toronto.shape[0]):
    if toronto.iloc[i][2] == 'Not assigned':
        toronto.iloc[i][2] = toronto.iloc[i][1]
        i = i+1

In [41]:
toronto

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## More than one neighborhood can exist in one postal code area. These rows will be combined into one row with the neighborhoods separated with a comma

In [31]:
toronto_df = toronto.groupby(['Postalcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
toronto_df

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


## Summary of dataframe

In [32]:
toronto_df.describe()

Unnamed: 0,Postalcode,Borough,Neighborhood
count,103,103,103
unique,103,10,99
top,M4W,North York,Downsview
freq,1,24,4


## Dropping NA

In [36]:
toronto_df = toronto_df.dropna()
toronto_df

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


## Shape of the dataframe

In [44]:
toronto_df.shape

(103, 3)

# PART B

In [46]:
!pip install geocoder
import geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


## Getting latitude and longitude of each postalcode and adding it to a list

In [55]:
def get_lat_long(postal_code):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lati_long_coords = g.latlng
    return lati_long_coords
    
coords = [ get_lat_long(postal_code) for postal_code in toronto_df['Postalcode'].tolist() ]
coords

[[43.811390000000074, -79.19661999999994],
 [43.78574000000003, -79.15874999999994],
 [43.765750000000025, -79.17469999999997],
 [43.76812000000007, -79.21760999999998],
 [43.76944000000003, -79.23891999999995],
 [43.74446000000006, -79.23116999999996],
 [43.725820000000056, -79.26460999999995],
 [43.71289000000007, -79.28505999999999],
 [43.72360000000003, -79.23495999999994],
 [43.695100000000025, -79.26465999999994],
 [43.75998000000004, -79.26939999999996],
 [43.75075000000004, -79.30053999999996],
 [43.794520000000034, -79.26707999999996],
 [43.784910000000025, -79.29721999999998],
 [43.817810000000065, -79.28043999999994],
 [43.80079000000006, -79.32161999999994],
 [43.83412000000004, -79.21667999999994],
 [43.80225000000007, -79.35557999999997],
 [43.780970000000025, -79.34780999999998],
 [43.781120000000044, -79.38059999999996],
 [43.756980000000055, -79.38059999999996],
 [43.79182000000003, -79.41371999999996],
 [43.76774000000006, -79.40727999999996],
 [43.747780000000034, -7

## Adding latitiude and longitude columns to the dataframe

In [59]:
coords_df = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
toronto_df['Latitude'] = coords_df['Latitude']
toronto_df['Longitude'] = coords_df['Longitude']
toronto_df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892


# PART C

In [68]:
!pip install folium
!pip install geopy
import folium
from geopy.geocoders import Nominatim 

Collecting geopy
  Downloading geopy-2.0.0-py3-none-any.whl (111 kB)
Collecting geographiclib<2,>=1.49
  Downloading geographiclib-1.50-py3-none-any.whl (38 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.0.0


In [71]:
address = 'Toronto, Ontario Canada'
geolocator = Nominatim(user_agent="myapplication")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto Canada are 43.6534817, -79.3839347.


In [72]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#87cefa',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)
map_toronto

In [66]:
toronto_data = toronto_df[toronto_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
print(toronto_data.shape)
toronto_data.head()

(39, 5)


Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.67709,-79.29547
1,M4K,East Toronto,"The Danforth West, Riverdale",43.68375,-79.35512
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.66797,-79.31467
3,M4M,East Toronto,Studio District,43.66213,-79.33497
4,M4N,Central Toronto,Lawrence Park,43.72843,-79.38713
