### Import Pandas for scrape and dataframe operation

In [1]:
import pandas as pd

### Use the Notebook to build the code to scrape the following Wikipedia page

In [2]:
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

### The dataframe will consist of three columns: PostalCode, District, and Neighborhood

In [3]:
df = pd.DataFrame(tables[0])
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned

In [4]:
df = df[df['Borough'] != "Not assigned"]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
281,M8Z,Etobicoke,Kingsway Park South West
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West


### Combine more than one neighborhood on one postal code area

In [5]:
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df = df.sort_values(by='Postcode', ascending=True)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [6]:
df['Neighbourhood'] = df.apply(lambda row: row['Borough'] if row['Neighbourhood'] == "Not assigned" else row['Neighbourhood'], axis=1)

### Use the .shape method to print the number of rows of your dataframe.

In [7]:
df.shape

(103, 3)

In [8]:
#import geocoder # import geocoder
#g = geocoder.google('Mountain View, CA')
#g.json

In [9]:
import wget, sys

In [10]:
# download geographical coordinates of each postal code
filename = wget.download("http://cocl.us/Geospatial_data")

  0% [                                                                                ]    0 / 2891100% [................................................................................] 2891 / 2891

In [11]:
if filename:
    df_csv = pd.read_csv(filename, na_values='NA', index_col=False)
else:
    print("File cannot be downloaded. Please try again!")
    sys.exit(1)

In [12]:
df_new = pd.merge(df, df_csv, how='left', left_on="Postcode", right_on="Postal Code")

In [17]:
df_new = df_new[['Postcode', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]
df_new

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
