## 1. Scraping wikipedia webpage to create a dataframe 

In [1]:
# import libraries 
import pandas as pd 
import numpy as np 
from IPython.display import Image 
from IPython.core.display import HTML
from googlemaps import Client as GoogleMaps 

In [2]:
# reading url using pandas
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
web = pd.read_html(url)

# converting list to dataframe 
df = web[0]
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


> Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [3]:
# filtering out rows with not assigned boroughs 
fil = (df['Borough'] == 'Not assigned')
df_fil1 = df[~fil]
df_fil1

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
160,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,Business reply mail Processing CentrE
169,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


> More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

In [4]:
# using group by and join 
df_fil2 = df_fil1.groupby(['Postal code','Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_fil2['Neighborhood'] = [x.replace('/',',') for x in df_fil2['Neighborhood']]
df_fil2

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov..."
101,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam..."


> If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [5]:
df_fil2['Neighborhood'].replace('Not assigned',df_fil2['Borough'],inplace=True)
df_fil2

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov..."
101,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam..."


> In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.

In [6]:
df_fil2.shape

(103, 3)

## 2. Appending latitude and longitude data to the table 

In [7]:
len(df_fil2)

103

In [26]:
# getting longitude and latitude data from google API 
import os
API = os.getenv('GMAP_API')
gmaps = GoogleMaps(API)

#create empty columns for latitude and longtitude 
df_fil2['Latitude'] = ''
df_fil2['Longitude'] = ''

# fetching latitude and longtitude data 
for x in range(len(df_fil2)):
    result = gmaps.geocode('{}, Toronto, Ontario'.format(df_fil2['Postal code'][x]))
    df_fil2['Latitude'][x] = result[0]['geometry']['location'] ['lat']
    df_fil2['Longitude'][x] = result[0]['geometry']['location']['lng']
    
df_fil2.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.8067,-79.1944
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395


## 3. Visualizing data using folium 

In [27]:
import folium 

# setting central latitude and longitude 
latitude = 43.6532
longitude = -79.3832

# generate map centred around Toronto city
venues_map = folium.Map(location=[latitude, longitude], zoom_start=10) 

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(df_fil2['Latitude'], df_fil2['Longitude'], df_fil2['Neighborhood']):
    folium.vector_layers.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

venues_map