# installing/importing necessary libraries for assignment

In [None]:
pip install geopy

In [None]:
!pip install lxml

In [1]:
import lxml
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

import json
from geopy.geocoders import Nominatim 

# Download & Explore Data Set into a Pandas Dataframe

In [2]:
read= 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
ds = pd.read_html(read, index_col=[0])
Ctable = ds[0]
Ctable

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...
M5Z,Not assigned,Not assigned
M6Z,Not assigned,Not assigned
M7Z,Not assigned,Not assigned
M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


# processes the cells that have an assigned borough. Ignore cells with a borough that is Not assigned. This shows how many rows have a not assigned value.

In [3]:
Ctable.Borough.value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
East Toronto         5
York                 5
Mississauga          1
Name: Borough, dtype: int64

# replaces 'Not assigned' with another value

In [4]:
Ctable.Borough.replace("Not assigned", np.nan, inplace = True) 
Ctable.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,,Not assigned
M2A,,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
Ctable.Borough.value_counts #shows not assigned replaced with NaN

<bound method IndexOpsMixin.value_counts of Postal Code
M1A                 NaN
M2A                 NaN
M3A          North York
M4A          North York
M5A    Downtown Toronto
             ...       
M5Z                 NaN
M6Z                 NaN
M7Z                 NaN
M8Z           Etobicoke
M9Z                 NaN
Name: Borough, Length: 180, dtype: object>

# drops nan from the table

In [11]:
Ctable.dropna(axis=0, inplace=True)
Ctable = Ctable.reset_index()
Ctable = Ctable.drop(['index'], axis=1)
Ctable.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


# Deletes the first column called level_0

In [12]:
del Ctable['level_0']

KeyError: 'level_0'

In [13]:
Ctable

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


# look at the first 12 rows

In [14]:
Ctable.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


# This is where the selections are grouped by postal code, borough and neighborhood

In [15]:
Ctable = Ctable.groupby(['Postal Code', 'Borough'])['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
Ctable = Ctable.reset_index()
Ctable.head(50)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Obtaining the shape of the table: columns and rows

In [16]:
Ctable.shape

(103, 3)

# Read in the second file

In [37]:
read2 = 'http://cocl.us/Geospatial_data'
Geof = pd.read_csv(read2, index_col=[0])
Geof

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


# Merge first dataframe with newly read dataframe

In [45]:
GeoTable=pd.merge(Ctable,Geof, on='Postal Code')
GeoTable

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


# Install and import folium for map visualization

In [50]:
pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [51]:
import folium

# determine Toronto, Canada's coordinates

In [55]:
Add = 'Toronto, Canada'
geoL = Nominatim(user_agent="to_explorer")
location = geoL.geocode(Add)
Lat = location.latitude
Long = location.longitude
print('The geograpical coordinates of Toronto, Canada are the following: {}, {}.'.format(Lat, Long))

The geograpical coordinates of Toronto, Canada are the following: 43.6534817, -79.3839347.


# Visualize Toronto with folium maps

In [92]:
Tmap = folium.Map(location=[43.6534817, -79.3839347], tiles='stamenterrain', zoom_start=12)
print("This is a map of Toronto and surrounding areas ")
Tmap

This is a map of Toronto and surrounding areas 


# Adding labels and markers to Toronto Map

In [98]:
for lat, lng, borough, neighborhood in zip(GeoTable['Latitude'], GeoTable['Longitude'], GeoTable['Borough'], GeoTable['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#232c43',
        fill_opacity=0.7,
        parse_html=False).add_to(Tmap) 
print(Tmap)