In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup
from html.parser import HTMLParser

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import json # library to handle JSON files
print('Libraries imported.')

# 1.Scraping Data from Wikipedia

In [None]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text
soup = BeautifulSoup(html_content, "html.parser")

# 2.Prepare Dataframe

In [None]:
gdp_table = soup.find("table", attrs={"class": "wikitable"})
gdp_table_data = gdp_table.tbody.find_all("tr")  # contains 2 rows

# Get all the headings of Lists
headings = []
for td in gdp_table_data[0].find_all("th"):
    #print(td.text)
    # remove any newlines and extra spaces from left and right
    headings.append(td.text.replace('\n', ' ').strip())

print(headings)
data = {}
table_data = []
for tr in gdp_table.tbody.find_all("tr"): # find all tr's from table's tbody
        t_row = {}

        # find all td's(3) in tr and zip it with t_header
        for td, th in zip(tr.find_all("td"), headings): 
            t_row[th] = td.text.replace('\n', '').strip()
        table_data.append(t_row)

# Put the data for the table with his heading.
testdata = pd.DataFrame(table_data)

## 2.a.remove not assigned Borough

In [None]:
onlyassigned = testdata[testdata['Borough'] != "Not assigned"]
onlyassigned = onlyassigned.dropna()

## 2.b.replace / with , in neighborhood

In [None]:
onlyassigned["Neighborhood"].replace("/",",", regex=True, inplace=True)

## 2.c. display the shape of the dataframe

In [None]:
onlyassigned.shape

## 2.d. Using Geospatial_data csv file to get lat/lon 

In [None]:
geodata = pd.read_csv("http://cocl.us/Geospatial_data")

In [None]:
neighborhoods = pd.merge(onlyassigned, geodata, left_on='Postal code', right_on='Postal Code').drop(['Postal code'], axis=1)

In [None]:
neighborhoods.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [None]:
neighborhoods = neighborhoods[['PostalCode', 'Borough','Neighborhood','Latitude','Longitude']]

In [None]:
neighborhoods

In [None]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

In [None]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

Solving environment: \ 

In [None]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto