In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
List_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(List_url).text

In [3]:
soup = BeautifulSoup(source, 'xml')

In [4]:
table=soup.find('table')

In [5]:
column_names=['PostalCode','Borough','Neighbourhood']
df = pd.DataFrame(columns=column_names)

In [6]:
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data

In [7]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [8]:
df=df[df['Borough']!='Not assigned']

In [9]:
df[df['Neighbourhood']=='Not assigned']=df['Borough']
df.head()

ValueError: cannot set using a list-like indexer with a different length than the value

In [10]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [11]:
temp_df=df.groupby('PostalCode')['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
temp_df=temp_df.reset_index(drop=False)
temp_df.rename(columns={'Neighbourhood':'Neighbourhood_joined'},inplace=True)

In [12]:
df1_grouped = pd.merge(df, temp_df, on='PostalCode')

In [13]:
df1_grouped.drop(['Neighbourhood'],axis=1,inplace=True)

In [14]:
df1_grouped.drop_duplicates(inplace=True)

In [15]:
df1_grouped.rename(columns={'Neighbourhood_joined':'Neighbourhood'},inplace=True)
df1_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [16]:
df1_grouped.shape

(103, 3)

In [17]:
def get_geocode(postal_code):
    # initialize your variable to None
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude,longitude

In [18]:
ll=pd.read_csv('http://cocl.us/Geospatial_data')

In [19]:
ll.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [21]:
ll.rename(columns={'Postal Code':'PostalCode'},inplace=True)
geo_merged = pd.merge(ll, df1_grouped, on='PostalCode')

In [22]:
geo_data=geo_merged[['PostalCode','Borough','Neighbourhood','Latitude','Longitude']]

In [23]:
geo_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [24]:
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longtitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(geo_merged[geo_merged['PostalCode'] == postcode], ignore_index=True)

cols = [2,4]
test_df.drop(test_df.columns[cols],axis=1,inplace=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Neighbourhood
0,M5G,Downtown Toronto,43.657952,-79.387383,Central Bay Street
1,M2H,North York,43.803762,-79.363452,Hillcrest Village
2,M4B,East York,43.706397,-79.309937,Parkview Hill / Woodbine Gardens
3,M1J,Scarborough,43.744734,-79.239476,Scarborough Village
4,M4G,East York,43.70906,-79.363452,Leaside
5,M4M,East Toronto,43.659526,-79.340923,Studio District
6,M1R,Scarborough,43.750072,-79.295849,Wexford / Maryvale
7,M9V,Etobicoke,43.739416,-79.588437,South Steeles / Silverstone / Humbergate / Jam...
8,M9L,North York,43.756303,-79.565963,Humber Summit
9,M5V,Downtown Toronto,43.628947,-79.39442,CN Tower / King and Spadina / Railway Lands / ...


In [26]:
test_df['Neighbourhood'].replace("/", ",",regex=True, inplace=True)
test_df

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Neighbourhood
0,M5G,Downtown Toronto,43.657952,-79.387383,Central Bay Street
1,M2H,North York,43.803762,-79.363452,Hillcrest Village
2,M4B,East York,43.706397,-79.309937,"Parkview Hill , Woodbine Gardens"
3,M1J,Scarborough,43.744734,-79.239476,Scarborough Village
4,M4G,East York,43.70906,-79.363452,Leaside
5,M4M,East Toronto,43.659526,-79.340923,Studio District
6,M1R,Scarborough,43.750072,-79.295849,"Wexford , Maryvale"
7,M9V,Etobicoke,43.739416,-79.588437,"South Steeles , Silverstone , Humbergate , Jam..."
8,M9L,North York,43.756303,-79.565963,Humber Summit
9,M5V,Downtown Toronto,43.628947,-79.39442,"CN Tower , King and Spadina , Railway Lands , ..."
