### Python Notebook for the Capstone Project in Coursera - Irlan Grangel Gonzalez

In [1]:
import pandas as pd
import numpy as np

#### Reading the HTML table from the URL

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url)[0]

#### Processing the resulted DataFrame for removing the "Not assigned" neighborhood and joining the Neighborhood based on Postal Code

In [3]:
df = df[df['Borough']!="Not assigned"]
df.max().to_frame().T
df

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [4]:
df.shape

(103, 3)

#### Merging the two Dataframes based on the Postal Code

In [5]:
df_codes_lat_lon = pd.read_csv("https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv")
final_df = pd.merge(df, df_codes_lat_lon, on='Postal Code')
final_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


#### Creating the Canada map with the neighborhoods without clustering

In [33]:
import folium

# create map and display it
canada_map = folium.Map(location=[43.63, -79.40], zoom_start=12)


# instantiate a feature group for the incidents in the dataframe
neighborhood = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(final_df.Latitude, final_df.Longitude):
    neighborhood.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )
    
# add pop-up text to each marker on the map
latitudes = list(final_df.Latitude)
longitudes = list(final_df.Longitude)
final_df['Neighborhood'] = final_df['Neighborhood'].str.replace("'", " ")
labels = list(final_df.Neighborhood)

for lat, lng, label in zip(latitudes, longitudes, labels):
    folium.Marker([lat, lng], popup=label).add_to(canada_map) 

# add incidents to map
canada_map.add_child(neighborhood)

#### Creating the Canada map with the neighborhoods with clustering

In [39]:
from folium import plugins

canada_clustered_map = folium.Map(location=[43.63, -79.40], zoom_start=12)

# instantiate a mark cluster object for the incidents in the dataframe
neighborhood_ = plugins.MarkerCluster().add_to(canada_clustered_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(final_df.Latitude, final_df.Longitude, final_df.Neighborhood):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(neighborhood_)
    
canada_clustered_map