## 1. Start by creating a new Notebook for this assignment.

## 2. Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe like the one shown below:

In [101]:
import numpy as np 
import pandas as pd 
import requests 
from bs4 import BeautifulSoup 
import csv
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries have been imported.')

Libraries have been imported.


We pull the URL and use BS4 to scrape the page

In [6]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
req = requests.get(URL)

soup = BeautifulSoup(req.content, 'html') # Define the URL
table = soup.find('table',{'class':'wikitable sortable'}) # Locate the table we're pulling data from 
link = table.find_all('td') # We list the values as TableData (td)

print('Page has been scrapped.')

Page has been scrapped.


## 3. "To create the above dataframe:"

In [7]:
# We define the DataFrame
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))

# And we print the first 5 values
data.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [8]:
# We pull the data were Borough does NOT have a "Not Assigned" value
filt_data_boro = data[data['Borough'] != 'Not assigned']

In [26]:
# We group the data, by concatinating values with ","
filtered = filt_data_boro.groupby(['Borough', 'Postal code'], as_index=False).agg(','.join)


In [32]:
filtered["Neighborhood"].replace({"Moore Park / Summerhill East": "Moore Park , Summerhill East"}, inplace=True)

filtered.head()

Unnamed: 0,Borough,Postal code,Neighborhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,North Toronto West
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park , Summerhill East"


In [52]:
filtered['Neighborhood'] = np.where(filtered['Neighborhood'] == 'Not assigned', filtered['Borough'], filtered['Neighborhood'])

SyntaxError: invalid syntax (<ipython-input-52-d8c0cd44ebed>, line 1)

In [51]:
filtered.shape

(103, 3)

In [70]:
# We rename the columns
filtered.columns = ['Borough','Postcode','Neighborhood']
filtered.columns

Index(['Borough', 'Postcode', 'Neighborhood'], dtype='object')

## 4.  Submit a link to your Notebook on your Github repository

In [54]:
GeoSpa_URL = 'http://cocl.us/Geospatial_data'
GeoSpa_Data = pd.read_csv(GeoSpa_URL)

In [55]:
GeoSpa_Data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [71]:
# We rename de columns to match the assignment's
GeoSpa_Data.columns = ['Postcode','Latitude','Longitude']
GeoSpa_Data.columns

Index(['Postcode', 'Latitude', 'Longitude'], dtype='object')

In [73]:
# We merge all of the relevant information and eliminate the redundancies
MergeData = pd.merge(filtered, GeoSpa_Data, on='Postcode')
MergeData.head()

Unnamed: 0,Borough,Postcode,Neighborhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,North Toronto West,43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park , Summerhill East",43.689574,-79.38316


In [96]:
# In order to plot the coordinates in a map, we need to concatinate Lat and Log into one ser of coordinates
MergeData['Location'] = list(zip(MergeData['Latitude'], MergeData['Longitude']))
MergeData.head()

Unnamed: 0,Borough,Postcode,Neighborhood,Latitude,Longitude,Location
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879,"(43.7280205, -79.3887901)"
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197,"(43.7127511, -79.3901975)"
2,Central Toronto,M4R,North Toronto West,43.715383,-79.405678,"(43.7153834, -79.40567840000001)"
3,Central Toronto,M4S,Davisville,43.704324,-79.38879,"(43.7043244, -79.3887901)"
4,Central Toronto,M4T,"Moore Park , Summerhill East",43.689574,-79.38316,"(43.6895743, -79.38315990000001)"


In [104]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

print('Done')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... 
  - anaconda/win-64::openssl-1.1.1d-he774522_2
  - defaults/win-64::openssl-1.1.1d-he774522_2done

# All requested packages already installed.

Done


In [116]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

column_names = MergeData.columns
neighborhoods = pd.DataFrame(columns=column_names)

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [118]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(MergeData['Latitude'], MergeData['Longitude'], MergeData['Borough'], MergeData['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto