# **Neighborhoods in Toronto**

In [76]:
from bs4 import BeautifulSoup
import requests
import xml
import pandas as pd

**Get table of PostalCodes from Wikipedia**

In [97]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url).text #scrapes Wikipedia page and converts data to text

soup = BeautifulSoup(data, 'lxml')

table = soup.find('table') #finds table containing postalcodes and neighborhoods

x = table.tbody
data = []
for tr in x.find_all('tr'): #populates data list that will be used to create dataframe
    data.append([ td.get_text().strip() for td in tr.find_all('td')])


**Create DataFrame of data parsed from table**

In [98]:
neighborhoods = pd.DataFrame(data, columns=['PostalCode','Borough','Neighborhood']) #create new dataframe
neighborhoods.at[9,'Neighborhood'] = "Queen's Park" #replace specificed neighborhood name with borough name
neighborhoods = neighborhoods.dropna() #drop empty rows
na = 'Not assigned'
neighborhoods = neighborhoods[(neighborhoods.Borough != na) & (neighborhoods.Neighborhood != na)] #selects useful rows

**Define function to join neighborhoods located in same PostalCode and Borough**

In [99]:
def neighborhood_list(grouped):    
    return ', '.join(sorted(grouped['Neighborhood'].tolist()))

neighborhoods_group = neighborhoods.groupby(['PostalCode', 'Borough'])
neighborhoods_grouped = neighborhoods_group.apply(neighborhood_list).reset_index(name='Neighborhood')
neighborhoods_grouped.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


**Number of rows and columns in dataframe**

In [96]:
neighborhoods_grouped.shape

(103, 3)