# Notebook dedicated to the Coursera Capstone project

In [4]:
import pandas as pd
import numpy as np

print("Hello Capstone Project Course!")

Hello Capstone Project Course!


# Part 1: Scraping the wikipedia

In [5]:
import requests
import lxml.html as lh

url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

page = requests.get(url)
doc = lh.fromstring(page.content)

tr_elements = doc.xpath('//tr')

column_names = ['PostalCode', 'Borough', 'Neighborhood'] 
neighborhoods = pd.DataFrame(columns=column_names)


for index in range(1,len(tr_elements)):
    row = tr_elements[index]
    
    if "Canadian postal codes" not in row.text_content():
        cell_index = 0
        postcode = ""
        borough = ""
        neighborhood = ""
        for cell in row.iterchildren():
            content = cell.text_content()
            
            if cell_index == 0:
                postcode = content
                
            elif cell_index == 1:
                borough = content
                
            elif cell_index == 2:
                neighborhood = content
            cell_index += 1
            
        if "Not assigned" not in borough:
            if "Not assigned" in neighborhood:
                neighborhood = borough
            neighborhood = neighborhood.replace("\n","")
            
            neighborhoods = neighborhoods.append({'PostalCode': postcode, 'Borough': borough, 'Neighborhood': neighborhood}, ignore_index=True)
            
    else:
        break
    
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


# Part 2: Shape 1

In [8]:
neighborhoods.shape

(104, 3)

# Part 3: Grouping the Neighborhood

In [10]:
neighborhoods = neighborhoods.groupby(['PostalCode', 'Borough'])['Neighborhood'].agg(lambda col: ', '.join(col)).reset_index()
neighborhoods = neighborhoods.drop(neighborhoods.index[0])
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1B,Scarborough,"Rouge, Malvern"
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae


# Part 4: Shape 2

In [12]:
neighborhoods.shape

(103, 3)

# Part 5: Geocoder

In [13]:
csv = 'http://cocl.us/Geospatial_data'
geo = pd.read_csv(csv, delimiter = ',')
geo = geo.rename(columns={'Postal Code': 'PostalCode'})
geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Part 6: Merge

In [14]:
bigdata = pd.merge(neighborhoods, geo, on='PostalCode')
bigdata.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part 7: Only Toronto 

In [15]:
toronto = bigdata[bigdata['Borough'].str.contains("Toronto")]
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


# Part 8: Map

In [16]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

Solving environment: done

# All requested packages already installed.



In [18]:
toronto_map = folium.Map(location=[43.662744,	-79.321558], zoom_start=13)

for lat, lng, label in zip(toronto.Latitude, toronto.Longitude, toronto.Borough):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(toronto_map)
    
toronto_map

### Link for the Juyter:  

https://eu-de.dataplatform.cloud.ibm.com/analytics/notebooks/v2/cc99ccad-6a66-4554-8941-29967e52351f/view?access_token=afad9698e152bfd78dc1263a87a576284d899050f0cb1eeed5f8fa98cb364c1a