# Toronto Neighborhood Analysis

## Part I: Prepare and Organize Data

#### First step: Import all relevant libraries

In [54]:
import pandas as pd
import numpy as np
import bs4 as soup
import html5lib, lxml

#### Second Step: Scrape all necessary data

In [55]:
post_codes = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

geo_data = pd.read_csv('https://cocl.us/Geospatial_data')

#### Third Step: Clean and organize data and then merge tables

In [56]:
df_codes = pd.DataFrame(post_codes[0]) #save correct table to df
df_codes = df_codes.replace('Not assigned', np.nan) #replace 'Not assigned' with true NaN value
df_codes.dropna(subset=['Borough'], axis=0, inplace=True) #drop all rows with NaN value in the Borough column

merged_df = df_codes.merge(geo_data, how='outer')

## Part II: Visualize Data

#### First Step: Import all relevant libraries

In [57]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### Second Step: Create a map of Toronto with neighborhoods indicated

In [58]:
# Create blank map centered on Toronto
map_Toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add neighborhood markers to map
for lat, lng, borough, neighborhood in zip(merged_df['Latitude'], merged_df['Longitude'], merged_df['Borough'], merged_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

#### Third Step: Create a subgroup which includes only Downtown Toronto neighborhoods

In [59]:
Toronto_data = merged_df[merged_df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


#### Fourth Step: Create another map but this time focus on downtown

In [60]:
# create blank map of Downtown Toronto
map_Toronto_2 = folium.Map(location=[43.6532, -79.3832], zoom_start=13)

# add neighborhood markers to map
for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto_2)  
    
map_Toronto_2