In [5]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np
import urllib.request

### First I imported all the required Libaries and Now I will scrape the web url for the data using Beautiful Soup

In [6]:
source=urllib.request.urlopen('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').read()
soup=BeautifulSoup(source,'lxml')

In [7]:
table=soup.find('table', attrs={'class':'wikitable sortable'})
table_rows=table.find_all('tr')

### Create my dataframe

In [8]:
res=[]
for tr in table_rows:
    td=tr.find_all('td')
    row=[tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
df=pd.DataFrame(res, columns=['Postcode','Borough','Neighbourhood'])

In [9]:
df.head(289)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### Cleaning up the data frame 

In [10]:
df['Borough'].replace('Not assigned', np.nan, inplace=True)

In [11]:
df.dropna(subset=['Borough'], axis=0, inplace=True)

In [12]:
df.head(212)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [13]:
df.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

### Following instructions - ignore the cells with a boroguh that is Not Assigned, Combining rows with the neighbourhoods that has the same post code and neighbourhood is same as borough if it is Not Assigned.

In [14]:
df['Neighbourhood'].replace('Not assigned',df['Borough'], inplace=True)

In [15]:
df.head(212)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [16]:
df_new=df.groupby(['Postcode', 'Borough']).sum()

In [17]:
df_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,RougeMalvern
M1C,Scarborough,Highland CreekRouge HillPort Union
M1E,Scarborough,GuildwoodMorningsideWest Hill
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [18]:
df_new.reset_index(inplace=True)

In [19]:
df_new.head(200)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,RougeMalvern
1,M1C,Scarborough,Highland CreekRouge HillPort Union
2,M1E,Scarborough,GuildwoodMorningsideWest Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,East Birchmount ParkIonviewKennedy Park
7,M1L,Scarborough,ClairleaGolden MileOakridge
8,M1M,Scarborough,CliffcrestCliffsideScarborough Village West
9,M1N,Scarborough,Birch CliffCliffside West


### Getting the shape as requested.

In [20]:
df_new.shape

(103, 3)

### Getting the data for Latitude and Longtitude, creating the dataframe and combining with the earlier dataframe so we have one dataframe to create our neighbourhood visualization.

In [21]:
df_geo=pd.read_csv('http://cocl.us/Geospatial_data')

In [22]:
df_geo.head(103)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [23]:
df_geo.rename(columns={'Postal Code' : 'Postcode'}, inplace=True)

In [24]:
df_geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [25]:
df_geo.columns

Index(['Postcode', 'Latitude', 'Longitude'], dtype='object')

In [26]:
df_new.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

In [27]:
df_combined=pd.concat([df_new, df_geo], axis=1, join='inner')

In [28]:
df_combined.head(103)

Unnamed: 0,Postcode,Borough,Neighbourhood,Postcode.1,Latitude,Longitude
0,M1B,Scarborough,RougeMalvern,M1B,43.806686,-79.194353
1,M1C,Scarborough,Highland CreekRouge HillPort Union,M1C,43.784535,-79.160497
2,M1E,Scarborough,GuildwoodMorningsideWest Hill,M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,East Birchmount ParkIonviewKennedy Park,M1K,43.727929,-79.262029
7,M1L,Scarborough,ClairleaGolden MileOakridge,M1L,43.711112,-79.284577
8,M1M,Scarborough,CliffcrestCliffsideScarborough Village West,M1M,43.716316,-79.239476
9,M1N,Scarborough,Birch CliffCliffside West,M1N,43.692657,-79.264848


In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


### Create a Map of Canada with Neighbourhoods Superimposed on top

In [3]:
address = 'Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Canada  are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Canada  are 61.0666922, -107.9917071.


In [30]:

map_canada = folium.Map(location=[latitude, longitude], zoom_start=10)


for lat, lng, borough, neighbourhood in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Borough'], df_combined['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)  
    
map_canada

In [31]:
address = 'Scarborough, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Scarborough are 54.2847601, -0.4090339.


In [32]:
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough