#### Import required libraries

In [1]:
import pandas as pd
import numpy as np

In [None]:
pip install lxml

#### Scrape the given Wikipedia page

In [2]:
url ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
totalData = pd.read_html(url,header=0,na_values=['Not assigned'])[0]

#### Ignore cells with a borough that is Not assigned.

In [3]:
df = totalData.dropna(subset = ['Borough'])

In [4]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [5]:
df.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

#### If neighborhood is Not assigned, then the neighborhood will be the same as the borough

In [7]:
df['Neighbourhood'].fillna(df['Borough'], inplace=True)

#### One row of postal code area with list of neighborhoods

In [9]:
grouped_df = df.groupby(['Postcode','Borough'])['Neighbourhood'].agg(lambda x: ','.join(map(str,list(np.unique(x)))))
postcode_df=grouped_df.to_frame().reset_index()
postcode_df.columns=['Postcode', 'Borough', 'Neighbourhood']
postcode_df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Highland Creek,Port Union,Rouge Hill"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."


## Answer for the 1st Query

#### Number of rows in the data 

In [10]:
postcode_df.shape[0]

103

#### Read the location data and load it into data frame

In [11]:
lacationData_url = 'http://cocl.us/Geospatial_data'
lacationData = pd.read_csv(lacationData_url)
lacationData

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


## Answer for the 2nd Query

#### Join the location info with our post code data

In [12]:
postcode_loc_df = postcode_df.join(lacationData.set_index('Postal Code'), on='Postcode')
postcode_loc_df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Port Union,Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [14]:
pip install geopy

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
[K     |████████████████████████████████| 102kB 3.3MB/s ta 0:00:011
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.20.0
Note: you may need to restart the kernel to use updated packages.


In [15]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address = 'Toronto'
geolocator = Nominatim(user_agent="JAC")
location = geolocator.geocode(address)
lat = location.latitude
lon = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(lat, lon))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [16]:
import folium

#### Add markers to Toronto map

In [20]:
map_toronto = folium.Map(location=[lat, lon], zoom_start=10)

for lat, lon, borough, neighbourhood in zip(postcode_loc_df['Latitude'], postcode_loc_df['Longitude'], postcode_loc_df['Borough'], postcode_loc_df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Total count per each postcode 

In [19]:
from folium import plugins

map_toronto = folium.Map(location = [lat, lon], zoom_start = 10)

# instantiate a marker cluster object for the postcodes in the dataframe
postcodes = plugins.MarkerCluster().add_to(map_toronto)

# loop through the dataframe and add each data point to the mark cluster
for lat, lon, postcode in zip(postcode_loc_df['Latitude'], postcode_loc_df['Longitude'], postcode_loc_df['Postcode']):
    label = 'lat-long: {}<br>Postcode: {}'.format(location[1], postcode)
    label = folium.Popup(label, parse_html=False)
    folium.Marker(
        location=[lat, lon],
        icon=folium.Icon(color='green', icon='ok-sign'),
        popup=label
    ).add_to(postcodes)

# display map
map_toronto