# IBM Data Science Capstone Project

### Import Libraries

In [96]:
import pandas as pd
import numpy as np
import requests
import geocoder
from bs4 import BeautifulSoup
import folium

### Download and create pandas Dataframe for Canada's Postal Codes

In [4]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [6]:
req = requests.get(url)
if req.status_code == 200:
    print('Requisition OK')
    content = req.content

Requisition OK


In [10]:
soup = BeautifulSoup(content, 'html.parser')

In [12]:
table = soup.find(name='table')

In [51]:
table_str = str(table)
df = pd.read_html(table_str)[0]

In [54]:
print(df.shape)
print(df.columns)

(103, 3)
Index(['Postal code', 'Borough', 'Neighborhood'], dtype='object')


### Clean up and organize the Dataframe

In [81]:
df = df[df['Borough']!='Not assigned'].reset_index().drop('index',axis=1)

In [91]:
df['Neighborhood'] = df['Neighborhood'].str.replace(' / ',',')

In [92]:
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park,Harbourfront"
3,M6A,North York,"Lawrence Manor,Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park,Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,"Old Mill South,King's Mill Park,Sunnylea,Humbe..."


### Add Coordinates to Dataframe

In [93]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv')

In [101]:
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [102]:
df = df.join(coordinates,how='left')

In [108]:
df

Unnamed: 0,Postal code,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M1B,43.806686,-79.194353
1,M4A,North York,Victoria Village,M1C,43.784535,-79.160497
2,M5A,Downtown Toronto,"Regent Park,Harbourfront",M1E,43.763573,-79.188711
3,M6A,North York,"Lawrence Manor,Lawrence Heights",M1G,43.770992,-79.216917
4,M7A,Downtown Toronto,"Queen's Park,Ontario Provincial Government",M1H,43.773136,-79.239476
...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",M9N,43.706876,-79.518188
99,M4Y,Downtown Toronto,Church and Wellesley,M9P,43.696319,-79.532242
100,M7Y,East Toronto,Business reply mail Processing CentrE,M9R,43.688905,-79.554724
101,M8Y,Etobicoke,"Old Mill South,King's Mill Park,Sunnylea,Humbe...",M9V,43.739416,-79.588437


### Visualize the Data

In [104]:
latitude = df['Latitude'].mean()
longitude = df['Longitude'].mean()

In [114]:
_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{} - {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(_map)  

In [115]:
_map