# Toronto Neighbourhoods from wikipedia

#### Importing the required libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd

#### Scraping the Wikipedia data 

In [2]:
REQUEST_URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
with requests.Session() as session:
    res = session.get(REQUEST_URL,verify=False)
soup = BeautifulSoup(res.text,"lxml")
table = soup.find("table", attrs={"class":"wikitable sortable"})
headings = [th.get_text() for th in table.find("tr").find_all("th")]
datasets = []
for row in table.find_all("tr")[1:]:
    cols=row.find_all("td")
    datasets.append((cols[0].text.strip(), cols[1].text.strip(), cols[2].text.strip()))
    
df=pd.DataFrame(datasets)
df.columns=headings



#### Removing 'Not assigned' data from the Borough

In [3]:
#df=df[df.Borough!='Not assigned']
index_1=df[df['Borough']=='Not assigned'].index
df.drop(index_1,inplace=True)
df.reset_index(drop=True,inplace=True)
df.sort_values('Postcode').reset_index(drop=True,inplace=True)

#### Replacing 'Not assigned' values with Borough values

In [4]:
df.loc[df['Neighbourhood\n']== 'Not assigned', "Neighbourhood\n"]=df.Borough


#### Concatinating Toronto Neighbourhood's belonging to same Borough based on Postalcode

In [5]:
df_cat=df.groupby(['Postcode', 'Borough'])['Neighbourhood\n'].apply(', '.join).reset_index()
df_cat

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [6]:
print(df_cat.shape)

(103, 3)


#### Reading Geographical coordinates of postal codes

In [7]:
df_pc=pd.read_csv("http://cocl.us/Geospatial_data")
df_pc.columns=['Postcode','Latitude','Longitude']

In [8]:
df_Geo=pd.merge(df_cat, df_pc, on=['Postcode'], how='inner')

In [9]:
df_Geo

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


#### Importing Libraries Geo Location and for creating maps

In [14]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  56.94 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  35.98 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  39.86 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  48.54 MB/s


#### Getting Lantitude and Longitude of Toronto

In [15]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Creating Toronto Map using latitude and longitude values with all Neighbourhoods Plotted

In [22]:
map_Toronto= folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_Geo['Latitude'], df_Geo['Longitude'], df_Geo['Borough'], df_Geo['Neighbourhood\n']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto