## Installing dependencies

In [1]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\Aditya\anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    certifi-2019.11.28         |           py37_0         148 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda

## Importing libraries

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

## Web Scraping

In [3]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')

## Reading contents

In [6]:
table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]

## Creating a dataframe with pre-processing completed

In [15]:
df = pd.DataFrame(row)
df1 = df[0].str.split('\n', expand=True)
df2 = df1.rename(columns=df1.iloc[0])
df3 = df2.drop(df2.index[0])
df4 = df3[df3.Borough != 'Not assigned']
df5 = df4.groupby(['Postal code', 'Borough'], sort = False).agg(','.join)
df5.reset_index(inplace = True)
df6 = df5.replace("Not assigned", "Queen's Park")
df6.rename(columns = {'Postal code':'Postcode'}, inplace = True)
df6.head()

Unnamed: 0,Postcode,Borough,Unnamed: 3,Unnamed: 4,Unnamed: 5,Neighborhood,Unnamed: 7
0,M3A,North York,,,,Parkwoods,
1,M4A,North York,,,,Victoria Village,
2,M5A,Downtown Toronto,,,,Regent Park / Harbourfront,
3,M6A,North York,,,,Lawrence Manor / Lawrence Heights,
4,M7A,Downtown Toronto,,,,Queen's Park / Ontario Provincial Government,


## Reading Geo-spatial data

In [8]:
url = "http://cocl.us/Geospatial_data"
df7 = pd.read_csv(url)
df7.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
df7.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merging coordinate data to the first dataframe

In [18]:
df8 = pd.merge(df6, df7, on='Postcode')
df8.head()

Unnamed: 0,Postcode,Borough,Unnamed: 3,Unnamed: 4,Unnamed: 5,Neighborhood,Unnamed: 7,Latitude,Longitude
0,M3A,North York,,,,Parkwoods,,43.753259,-79.329656
1,M4A,North York,,,,Victoria Village,,43.725882,-79.315572
2,M5A,Downtown Toronto,,,,Regent Park / Harbourfront,,43.65426,-79.360636
3,M6A,North York,,,,Lawrence Manor / Lawrence Heights,,43.718518,-79.464763
4,M7A,Downtown Toronto,,,,Queen's Park / Ontario Provincial Government,,43.662301,-79.389494


## Dataframe concerned with 'Toronto'

In [19]:
Toronto=df8[df8['Borough'].str.contains('Toronto')]
Toronto.head(10)

Unnamed: 0,Postcode,Borough,Unnamed: 3,Unnamed: 4,Unnamed: 5,Neighborhood,Unnamed: 7,Latitude,Longitude
2,M5A,Downtown Toronto,,,,Regent Park / Harbourfront,,43.65426,-79.360636
4,M7A,Downtown Toronto,,,,Queen's Park / Ontario Provincial Government,,43.662301,-79.389494
9,M5B,Downtown Toronto,,,,"Garden District, Ryerson",,43.657162,-79.378937
15,M5C,Downtown Toronto,,,,St. James Town,,43.651494,-79.375418
19,M4E,East Toronto,,,,The Beaches,,43.676357,-79.293031
20,M5E,Downtown Toronto,,,,Berczy Park,,43.644771,-79.373306
24,M5G,Downtown Toronto,,,,Central Bay Street,,43.657952,-79.387383
25,M6G,Downtown Toronto,,,,Christie,,43.669542,-79.422564
30,M5H,Downtown Toronto,,,,Richmond / Adelaide / King,,43.650571,-79.384568
31,M6H,West Toronto,,,,Dufferin / Dovercourt Village,,43.669005,-79.442259


## Map to visualize your neighborhoods and how they cluster together

In [21]:
address = 'Toronto'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], 
                                           Toronto['Borough'], Toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map