## Install libraries relevant to the task

In [72]:
!pip install BeautifulSoup4
!pip install lxml
!pip install requests
!pip install geopy
import requests
import io
import json

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

from bs4 import BeautifulSoup
import pandas as pd
import numpy as np



### Scraping the Data
1. Gets the wikipedia page as variable source.
2. Create soup object which parses the source variable.
3. Create empty variables data and columns to simplify data frame creation later.
4. Create a nested for loop that goes through:</br>
    A. Each individual row ('tr').</br>
    B. To append each header ('th')  and standard cell ('td') to list variable section.</br>
        a. Uses nested for loop index 0 of section to determine headers and assign it to list variable column.
        b. Else assigns all other rows to list variable data. 
5. Create Pandas data frame canada_df from list variables data and columns.
6. Use head function to print the first 15 rows of our new data frame.

In [44]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')
data=[]
columns=[]
table=soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section=[]
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    if (index==0):
        columns = section
    else:
        data.append(section)

canada_df=pd.DataFrame(data=data, columns=columns)
canada_df.head(15)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"


#### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [51]:
canada_df = canada_df[canada_df['Borough'] != 'Not assigned']
canada_df.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.
#### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [63]:
canada_df = canada_df.groupby(['Postal Code', 'Borough'])['Neighborhood'].apply(list).apply(lambda x:', '.join(x)).to_frame().reset_index()
for index, row in canada_df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
canada_df.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Prove code worked

In [66]:
canada_df.loc[canada_df['Postal Code'] == 'M5A']

Unnamed: 0,Postal Code,Borough,Neighborhood
53,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### In the last cell of your notebook, use the .shape method to print the number of rows of your data frame.

In [70]:
canada_df.shape

(103, 3)

1. Use the Geocoder package or the csv file to create the data frame.
2. Rename the first column to allow merging data frames on Postcode.
3. Reorder column names and show the data frame.

In [80]:
geo_source=requests.get("http://cocl.us/Geospatial_data").content
geo_df=pd.read_csv(io.StringIO(geo_source.decode('utf-8')))

geo_df.columns = ['Postal Code', 'Latitude', 'Longitude']
df = pd.merge(geo_df, canada_df, on='Postal Code')

df = df[['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']]
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


<H4>  1. Create Toronto data frame from Canada data where the borough name contains Toronto and reset the index.
</br>2. Check the shape to ensure it worked.

In [84]:
toronto_df = df[df['Borough'].str.contains('Toronto')].reset_index()
toronto_df.shape

(39, 6)

<H4> 1.  Import geopy and folium 
   </br> 2. Gather coordinates for Toronto with geopy
   </br> 3. Create Map of Toronto
   </br> 4. Plot neighborhoods of Toronto as markers on Toronto Map

In [91]:
from geopy.geocoders import Nominatim
import folium
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [94]:

toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto)  
    
toronto