### Build a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name in Toronto.
### Get the geographical coordinates of the neighborhoods in Toronto.

**Import libraries**

In [1]:
import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import numpy as np
import json 
from pandas.io.json import json_normalize 
from geopy.geocoders import Nominatim
import requests
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 
import re

 **Scrap data from Web page into a DataFrame**

In [2]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(data, 'html.parser')
postal_Code_List = []
borough_List = []
neighborhood_List = []
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    for cell in cells:
        postalcode=cell.find('b')
        postal_Code_List.append(postalcode.text)
        location_values=cell.find_all(['span'])
        location_value_text=location_values[0].text.split('(')
        borough_List.append(location_value_text[0])
        if location_value_text[0]=="Not assigned":
            neighborhood_List.append("")
        else:
            neighborhood_List.append(location_value_text[1].split(')')[0])
toronto_df = pd.DataFrame({"PostalCode": postal_Code_List,
                           "Borough": borough_List,
                           "Neighborhood": neighborhood_List})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


**Drop cells with a borough that is "Not assigned"**

In [3]:
toronto_df_dropna = toronto_df[toronto_df.Borough != "Not assigned"].reset_index(drop=True)
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Queen's Park,Ontario Provincial Government


**Group neighborhoods in the same borough**

In [4]:
toronto_df_grouped = toronto_df_dropna.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


**For Neighborhood="Not assigned", make the value the same as Borough**

In [5]:
for index, row in toronto_df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


 **Check whether it is the same as required by the question**

In [6]:
column_names = ["PostalCode", "Borough", "Neighborhood"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M6A", "M7A", "M9A", "M3B", "M6B", "M1E", "M4G", "M6H", "M3J", "M6J", "M5K", "M6L"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_grouped[toronto_df_grouped["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M6A,North York,Lawrence Manor / Lawrence Heights
1,M7A,Queen's Park,Ontario Provincial Government
2,M9A,Etobicoke,Islington Avenue
3,M3B,North York,Don Mills
4,M6B,North York,Glencairn
5,M1E,Scarborough,Guildwood / Morningside / West Hill
6,M4G,East York,Leaside
7,M6H,West Toronto,Dufferin / Dovercourt Village
8,M3J,North York,Northwood Park / York University
9,M6J,West Toronto,Little Portugal / Trinity


**Finally, print the number of rows of the cleaned dataframe**

In [7]:
toronto_df_grouped.shape

(103, 3)

**Load the coordinates from the csv file on Coursera**

In [11]:
coordinates = pd.read_csv("C:\\Users\dhanashree.nangre\Downloads\Geospatial_Coordinates.csv")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


**Merge two tables to get the coordinates**

In [13]:
toronto_df_new = toronto_df_grouped.merge(coordinates, on="PostalCode", how="left")
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


**Finally, check to make sure the coordinates are added as required by the question**

In [17]:
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M6A", "M7A", "M9A", "M3B", "M6B", "M1E", "M4G", "M6H", "M3J", "M6J", "M5K", "M6L"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_new[toronto_df_new["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
1,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
2,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
6,M4G,East York,Leaside,43.70906,-79.363452
7,M6H,West Toronto,Dufferin / Dovercourt Village,43.669005,-79.442259
8,M3J,North York,Northwood Park / York University,43.76798,-79.487262
9,M6J,West Toronto,Little Portugal / Trinity,43.647927,-79.41975
