### Part 1

In [13]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import folium

In [14]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_='wikitable')
table_rows = table.find_all('tr')

In [15]:
temp = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [d.text.strip() for d in td]

    # excluding rows whose borough is not assigned
    if row and row[1] != 'Not assigned':
        # in case post code don't have neighbourhood assigned
        if row[2] == 'Not assigned':
            row[2] = row[1]
        temp.append(row)

In [29]:
# creating dataframe
df = pd.DataFrame(data=temp, columns=['Postcode', 'Borough', 'Neighbourhood'])
# print(df.shape)

tempNeighbourhoodDf =df.groupby('Postcode')['Neighbourhood'].apply(','.join).reset_index().set_index('Postcode')

#droping rows and joining above dataframes 
df = df.drop('Neighbourhood', axis=1).drop_duplicates().set_index('Postcode')
df = df.join(tempNeighbourhoodDf)
print(df)

                   Borough                                      Neighbourhood
Postcode                                                                     
M3A             North York                                          Parkwoods
M4A             North York                                   Victoria Village
M5A       Downtown Toronto                                       Harbourfront
M6A             North York                    Lawrence Heights,Lawrence Manor
M7A       Downtown Toronto                                       Queen's Park
...                    ...                                                ...
M8X              Etobicoke        The Kingsway,Montgomery Road,Old Mill North
M4Y       Downtown Toronto                               Church and Wellesley
M7Y           East Toronto  Business Reply Mail Processing Centre 969 Eastern
M8Y              Etobicoke  Humber Bay,King's Mill Park,Kingsway Park Sout...
M8Z              Etobicoke  Kingsway Park South West,Mimico NW,T

In [30]:
df.shape

(103, 2)

### Part 2

In [25]:
url2="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(url2)
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [27]:
print(list(df))
print(list(geo_data))

['Borough', 'Neighbourhood']
['Postal Code', 'Latitude', 'Longitude']


In [39]:
full_table = df.join(geo_data.set_index('Postal Code'))
full_table = full_table.sample(frac=1).reset_index(drop=True)
full_table.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Scarborough,Scarborough Village,43.744734,-79.239476
1,North York,"Downsview,North Park,Upwood Park",43.713756,-79.490074
2,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848
3,Scarborough,Woburn,43.770992,-79.216917
4,North York,"Emery,Humberlea",43.724766,-79.532242


### Part 3

In [32]:
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

In [36]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {0}, {1}.'.format(latitude, longitude))


  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [38]:
map_geo = folium.Map(location=[latitude, longitude], zoom_start=11)


for lat, lng, label in zip(full_table['Latitude'], full_table['Longitude'], full_table['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_geo)  
    
map_geo