# **Explore Segmenting and Clustering Neighborhoods in Toronto City** 
##### *By: Alexandra Butke*

## Exploring the Data

In [2]:
#scrape the Wikipedia page

import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from urllib.request import urlopen
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url).read().decode('utf-8')
tables = BeautifulSoup(html, 'html.parser')

In [3]:
#wrangle the data
tables = tables.find('table')
data = tables.find_all('td')

In [4]:
# clean data and read it into a pandas dataframe

postal_code = []
borough= []
neighborhood = []

for i in range(0, len(data), 3):
    postal_code.append(data[i].text.strip())
    borough.append(data[i+1].text.strip())
    neighborhood.append(data[i+2].text.strip())
        
df1 = pd.DataFrame(data=[postal_code, borough, neighborhood]).transpose()
df1.columns = ['Postal Code', 'Borough', 'Neighborhood']
df1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
df1['Borough'].replace('Not assigned', np.nan, inplace=True)
df1['Neighborhood'].replace('Not assigned', np.nan, inplace=True)
df1.dropna(subset=['Borough'], inplace=True)
df1

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:
df1.shape

(103, 3)

# Retrieve neighborhood latitude and longitude coordinates

In [7]:
#get geospacial data for Toronta

toronto_geo_url = "https://cocl.us/Geospatial_data"
toronto_geo_data = pd.read_csv(toronto_geo_url)
toronto_geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
#merge wiki data and geo data

toronto_df = pd.merge(df1, toronto_geo_data, on='Postal Code')
toronto_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Generate maps to visualize Toronto neighborhoods

In [9]:
# create map of Toronto using latitude and longitude values
import folium # map rendering library

latitude = 43.728020
longitude = -79.388790

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto