# Creating DataFrame with Geospatial Data

In this notebook, a dataframe similar to the one last created will be rendered with geospatial data.

First, I read the csv file provided into a dataframe called "df_geo".

In [2]:
import pandas as pd

df_geo = pd.read_csv('Geospatial_Coordinates.csv') 

In [3]:
print(df_geo.shape)
df_geo.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [4]:
df_geo['Postal Code'].value_counts

<bound method IndexOpsMixin.value_counts of 0      M1B
1      M1C
2      M1E
3      M1G
4      M1H
5      M1J
6      M1K
7      M1L
8      M1M
9      M1N
10     M1P
11     M1R
12     M1S
13     M1T
14     M1V
15     M1W
16     M1X
17     M2H
18     M2J
19     M2K
20     M2L
21     M2M
22     M2N
23     M2P
24     M2R
25     M3A
26     M3B
27     M3C
28     M3H
29     M3J
      ... 
73     M6C
74     M6E
75     M6G
76     M6H
77     M6J
78     M6K
79     M6L
80     M6M
81     M6N
82     M6P
83     M6R
84     M6S
85     M7A
86     M7R
87     M7Y
88     M8V
89     M8W
90     M8X
91     M8Y
92     M8Z
93     M9A
94     M9B
95     M9C
96     M9L
97     M9M
98     M9N
99     M9P
100    M9R
101    M9V
102    M9W
Name: Postal Code, Length: 103, dtype: object>

I then recreate the dataframe of postal codes of Canada.

In [5]:
import requests
from bs4 import BeautifulSoup

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('table', {'class':'wikitable sortable'}).tbody

rows = table.find_all('tr')

columns = [v.text.replace('\n','') for v in rows[0].find_all('th')]

df = pd.DataFrame(columns=columns)

for i in range(1, len(rows)):
    tds = rows[i].find_all('td')
    
    if len(tds) == 3:
        values = [tds[0].text.replace('\n',''), 
                  tds[1].text.replace('\n',''), 
                  tds[2].text.replace('\n','')]
    else:
        values = [td.text.replace('\n','') for td in tds]
    
    df = df.append(pd.Series(values, index=columns), ignore_index=True)

df = df[df.Borough != 'Not assigned']
df

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


Now I filter through the original df to gather corresponding borough names.

In [6]:
listOfBoroughs = []

for postalCode in df_geo['Postal Code']:
    boroughName = df.loc[df['Postal Code']==postalCode, 'Borough'].values[0]
    listOfBoroughs.append(boroughName)
    
print(listOfBoroughs)

['Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'Scarborough', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'North York', 'East York', 'East York', 'East Toronto', 'East York', 'East York', 'East York', 'East Toronto', 'East Toronto', 'East Toronto', 'Central Toronto', 'Central Toronto', 'Central Toronto', 'Central Toronto', 'Central Toronto', 'Central Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'Downtown Toronto', 'North Yo

In [7]:
len(listOfBoroughs)

103

In [8]:
df_geo['Borough'] = listOfBoroughs
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude,Borough
0,M1B,43.806686,-79.194353,Scarborough
1,M1C,43.784535,-79.160497,Scarborough
2,M1E,43.763573,-79.188711,Scarborough
3,M1G,43.770992,-79.216917,Scarborough
4,M1H,43.773136,-79.239476,Scarborough
5,M1J,43.744734,-79.239476,Scarborough
6,M1K,43.727929,-79.262029,Scarborough
7,M1L,43.711112,-79.284577,Scarborough
8,M1M,43.716316,-79.239476,Scarborough
9,M1N,43.692657,-79.264848,Scarborough


I redo the same with neighborhoods this time. 

In [9]:
listOfNeighborhoods = []

for postalCode in df_geo['Postal Code']:
    neighborhoodName = df.loc[df['Postal Code']==postalCode, 'Neighborhood'].values[0]
    listOfNeighborhoods.append(neighborhoodName)
    
print(listOfNeighborhoods)

['Malvern, Rouge', 'Rouge Hill, Port Union, Highland Creek', 'Guildwood, Morningside, West Hill', 'Woburn', 'Cedarbrae', 'Scarborough Village', 'Kennedy Park, Ionview, East Birchmount Park', 'Golden Mile, Clairlea, Oakridge', 'Cliffside, Cliffcrest, Scarborough Village West', 'Birch Cliff, Cliffside West', 'Dorset Park, Wexford Heights, Scarborough Town Centre', 'Wexford, Maryvale', 'Agincourt', "Clarks Corners, Tam O'Shanter, Sullivan", "Milliken, Agincourt North, Steeles East, L'Amoreaux East", "Steeles West, L'Amoreaux West", 'Upper Rouge', 'Hillcrest Village', 'Fairview, Henry Farm, Oriole', 'Bayview Village', 'York Mills, Silver Hills', 'Willowdale, Newtonbrook', 'Willowdale, Willowdale East', 'York Mills West', 'Willowdale, Willowdale West', 'Parkwoods', 'Don Mills', 'Don Mills', 'Bathurst Manor, Wilson Heights, Downsview North', 'Northwood Park, York University', 'Downsview', 'Downsview', 'Downsview', 'Downsview', 'Victoria Village', 'Parkview Hill, Woodbine Gardens', 'Woodbine 

In [10]:
df_geo['Neighborhood'] = listOfNeighborhoods
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude,Borough,Neighborhood
0,M1B,43.806686,-79.194353,Scarborough,"Malvern, Rouge"
1,M1C,43.784535,-79.160497,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae
5,M1J,43.744734,-79.239476,Scarborough,Scarborough Village
6,M1K,43.727929,-79.262029,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,43.711112,-79.284577,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,43.716316,-79.239476,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,43.692657,-79.264848,Scarborough,"Birch Cliff, Cliffside West"


In [11]:
df_geo.to_csv(r'dataframe_geo.csv', index=False)