## Explore the neighborhoods in the city of Toronto

<a id='item1'></a>

# Import libraries

In [102]:
import numpy as np
import pandas as pd
import requests

!conda install -c conda-forge beautifulsoup4 --yes
import bs4
from bs4 import BeautifulSoup

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



# Get Data from URL 

In [129]:
request_data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(request_data, "html.parser")

# Find the data in the table cells and construct a dataframe

In [130]:
table=soup.find('table')
#dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
df = pd.DataFrame(columns = ['PostalCode','Borough','Neighborhoods'])
df.shape

# Search all the postcode, borough, neighborhood 
for trcelldata in table.find_all('tr'):
    trdata=[]
    for tdcelldata in trcelldata.find_all('td'):
        trdata.append(tdcelldata.text.strip())
    if len(trdata)==3:
        df.loc[len(df)] = trdata

# Perform the data cleanup and grooming required in lab

In [131]:
df.dropna()
# drop Borough not assigned 
df.drop(df[ df['Borough'] == 'Not assigned' ].index, inplace = True) 

# assign Borough to Neighborhood if latter not assigned
df.Neighborhoods[df.Neighborhoods == 'Not assigned'] = df.Borough

# Create a second frame grouping by postal code with neighborhoods transformed by , concatennation
# In the dataset this is already done so no need to do this, but doing it because it is part of assignment 
df1=df.groupby('PostalCode')['Neighborhoods'].apply(lambda x: "%s" % ', '.join(x))
# Merge , drop the extra column not comma separated and rename to correct name
df2 = pd.merge(df, df1, on='PostalCode').drop_duplicates().drop(['Neighborhoods_x'],axis=1).rename(columns={'Neighborhoods_y':'Neighborhoods'})

df2.shape


(103, 3)

# Copy lab geocoder here 

In [134]:
!conda install -c conda-forge geocoder --yes
import geocoder # import geocoder

def get_geocode_from_postal_code(postal_code):
  # initialize your variable to None
  lat_lng_coords = None
  # loop until you get the coordinates
  while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
    lat_lng_coords = g.latlng
  latitude = lat_lng_coords[0]
  longitude = lat_lng_coords[1]
  return latitude, longitude

# Create a dataframe for geospatial data

In [135]:
# Rename column to match that of df
geospatial_df = pd.read_csv('http://cocl.us/Geospatial_data')
geospatial_df.rename(columns={'Postal Code' : 'PostalCode'}, inplace=True)
geospatial_df.he

# Create the dataframe required with neighborhoods and geo data

In [128]:
#Merge to get the required dataframe
df_neighborhood_geo = pd.merge(df2, geospatial_df, on='PostalCode')
df_neighborhood_geo.head()

Unnamed: 0,PostalCode,Borough,Neighborhoods,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
