## Part 1

In [33]:
#import pandas
import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
from pandas.io.json import json_normalize  # tranform JSON file into a pandas dataframe
import numpy as np
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [4]:
# Import and use Beautiful Soup to get the data from the site
import requests
from bs4 import BeautifulSoup
result = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
src = result.content

In [5]:
#Get the data from the website
soup = BeautifulSoup(src, 'lxml')
data = []

#Find all the tables, relevant data
for td_tag in soup.find_all("td"):
    data.append(td_tag.text.strip())

#Create lists to store sepearte informaton
zip_codes = []
boroughs = []
neighberhoods = []

#Go through data and split it in to categories, plus clean it up
for x in data:
    zip_codes.append(x[0:3])  
    boroughs.append(x[3:].replace(")",'').split('(')[0])
    try:
        ng = x[3:].replace(")",'').replace(' /', ',').split('(')[1]
        neighberhoods.append(ng)
    except:
        neighberhoods.append('Not assigned')


#make a dictionary from lists
d = {'Postal Code': zip_codes, 'Borough': boroughs, 'Neighborhood': neighberhoods}
#convert to dataframe
df_t = pd.DataFrame(d)

#clean up datafram

df = df_t[df_t['Borough'] != 'Not assigned']
df = df[df['Postal Code'].map(len) > 2]
df = df[df['Postal Code'].str.isalnum()]

#reset index after cleanup, and get rid of old index
df.reset_index(inplace=True)
df.drop(columns='index', inplace=True)
df.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [6]:
#shape of the dataframe
df.shape

(103, 3)

## Part 2

In [7]:
#import coordinational data from CSV
cords = pd.read_csv('Geospatial_Coordinates.csv')
pd.DataFrame(cords)

#Merge the two data frames together, to have the Latitude and Logitude
df_all = pd.merge(df, cords, on=['Postal Code'])
df_all.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## Part 3
Analyze Each Neoghborhood

In [8]:
#fidn the locationas for city of Toronto
city = 'Toronto, ON'

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(city)
lat = location.latitude
lng = location.longitude
print('Geo Location is : {} Latitiude, {} Longitude'. format(lat, lng))

Geo Location is : 43.6534817 Latitiude, -79.3839347 Longitude


In [9]:
#Map of toronto
map_toronto = folium.Map(location=[lat,lng], zoom_start=12)
map_toronto

In [10]:
for lat, lng, bor, neigh in zip(df_all['Latitude'], df_all['Longitude'], df_all['Borough'], df_all['Neighborhood']):

    label= '{}, {}'.format(neigh, bor)
    label= folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        popup=label,
        radius=5,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

map_toronto

## Boroughs that only contain Toronto

In [11]:
df_tor_bor = df_all[df_all['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_tor_bor.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [12]:

#Map of toronto
map_toronto_center = folium.Map(location=[lat,lng], zoom_start=12)

#plot on a new map
for lat, lng, bor, neigh in zip(df_tor_bor['Latitude'], df_tor_bor['Longitude'], df_tor_bor['Borough'], df_tor_bor['Neighborhood']):

    label= '{}, {}'.format(neigh, bor)
    label= folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        popup=label,
        radius=5,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_center)

map_toronto_center