## Import Libraries

In [1]:
import pandas as pd
import numpy as np

## Read data from wikipedia

In [2]:
data1 = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0)

## Only keep the first list

In [3]:
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
df = data1[0]
df.sample(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
161,M9X,Not assigned,Not assigned
138,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest..."
93,M4N,Central Toronto,Lawrence Park
43,M8G,Not assigned,Not assigned
113,M6R,West Toronto,"Parkdale, Roncesvalles"
14,M6B,North York,Glencairn
65,M3K,North York,Downsview
99,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town..."
52,M8H,Not assigned,Not assigned
58,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands"


## Ignore cells with a borough that is Not assigned

In [4]:
df = df[df['Borough'] != 'Not assigned']
df.sample(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
81,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
111,M4R,Central Toronto,"North Toronto West, Lawrence Park"
103,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park"
27,M1E,Scarborough,"Guildwood, Morningside, West Hill"
107,M9P,Etobicoke,Westmount
47,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North"
59,M6J,West Toronto,"Little Portugal, Trinity"
108,M1R,Scarborough,"Wexford, Maryvale"


In [5]:
#Already been done
#df['Neighbourhood'] = df.groupby(['Postal Code'])['Neighbourhood'].transform(lambda x: ','.join(x))
#df.drop_duplicates()

In [6]:
df.describe()

Unnamed: 0,Postal Code,Borough,Neighbourhood
count,103,103,103
unique,103,10,99
top,M2H,North York,Downsview
freq,1,24,4


In [7]:
df.shape

(103, 3)

## Read coordinate csv file

In [8]:
df2 = pd.read_csv('http://cocl.us/Geospatial_data')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge two datasets

In [9]:
df_merge = pd.merge(df, df2, on = 'Postal Code')
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [10]:
#Geocoder not working

'''
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
'''

"\nimport geocoder # import geocoder\n\n# initialize your variable to None\nlat_lng_coords = None\n\n# loop until you get the coordinates\nwhile(lat_lng_coords is None):\n  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n  lat_lng_coords = g.latlng\n\nlatitude = lat_lng_coords[0]\nlongitude = lat_lng_coords[1]\n"

In [11]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed and imported!')

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Folium installed and imported!


# Show postal code distribution on the Toronto map

In [12]:
# define latitude and longitude for toronto

latitude = 43.70
longitude = -79.42

In [13]:
# define and display the toronto map
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)
toronto_map

In [14]:
# show postal code distribution on the map

postal_code = folium.map.FeatureGroup()

for lat, lng, in zip(df_merge['Latitude'], df_merge['Longitude']):
    postal_code.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

# add incidents to map
toronto_map.add_child(postal_code)

# Group postal codes to clusters

In [15]:
from folium import plugins

# define and display the toronto map again for a clean copy
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# instantiate a mark cluster object for the incidents in the dataframe
borough = plugins.MarkerCluster().add_to(toronto_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, bor, in zip(df_merge['Latitude'], df_merge['Longitude'], df_merge['Borough']):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=bor,
    ).add_to(borough)

# display map
toronto_map

In [16]:
#END