# **Neighborhoods in Toronto**

In [2]:
from bs4 import BeautifulSoup
import requests
import xml
import pandas as pd

**Get table of PostalCodes from Wikipedia**

In [17]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url).text #scrapes Wikipedia page and converts data to text

soup = BeautifulSoup(data, 'lxml')

table = soup.find('table') #finds table containing postalcodes and neighborhoods

x = table.tbody
data = []
for tr in x.find_all('tr'): #populates data list that will be used to create dataframe
    data.append([ td.get_text().strip() for td in tr.find_all('td')])


**Create DataFrame of data parsed from table**

In [18]:
neighborhoods = pd.DataFrame(data, columns=['PostalCode','Borough','Neighborhood']) #create new dataframe
neighborhoods.at[9,'Neighborhood'] = "Queen's Park" #replace specificed neighborhood name with borough name
neighborhoods = neighborhoods.dropna() #drop empty rows
na = 'Not assigned'
neighborhoods = neighborhoods[(neighborhoods.Borough != na) & (neighborhoods.Neighborhood != na)] #selects useful rows

**Define function to join neighborhoods located in same PostalCode and Borough**

In [19]:
def neighborhood_list(grouped):    
    return ', '.join(sorted(grouped['Neighborhood'].tolist()))

neighborhoods_group = neighborhoods.groupby(['PostalCode', 'Borough'])
neighborhoods_grouped = neighborhoods_group.apply(neighborhood_list).reset_index(name='Neighborhood')
neighborhoods_grouped.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


**Number of rows and columns in dataframe**

In [20]:
neighborhoods_grouped.shape

(103, 3)

**Read in csv of coordinates and merge to original dataframe**

In [22]:
data2 = pd.read_csv('https://cocl.us/Geospatial_data')
data2 = data2.rename(columns={'Postal Code' : 'PostalCode'}) #rename Postal Code column to match other dataframe

In [23]:
left = neighborhoods_grouped
right = data2

neighborhoods_location = pd.merge(left, right, on='PostalCode') #merge coordinates into original dataframe by PostalCode
neighborhoods_location.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
