In [2]:
# Explore and cluster the neighborhoods in Toronto
!conda install -c conda-forge folium=0.5.0 
from geopy.geocoders import Nominatim
import folium

Solving environment: done

# All requested packages already installed.



In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
print('Libraries installed!')

data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
can_data = BeautifulSoup(data, 'html.parser')

postalCodeList = []
boroughList = []
neighborhoodList = []

for row in can_data.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text.rstrip('\n')) # remove the new line char from neighborhood cell
        
tor_neighorhood = [('PostalCode', postalCodeList),
                      ('Borough', boroughList),
                      ('Neighborhood', neighborhoodList)]
df = pd.DataFrame.from_dict(dict(tor_neighorhood))
# df

df_drop = df[df.Borough != 'Not assigned'].reset_index(drop=True)
# df_drop

df_grp = df_drop.groupby(['PostalCode','Borough'], as_index=False).agg(lambda x: ','.join(x))
# df_grp

neigh_row = df_grp.Neighborhood == 'Not assigned'
df_grp.loc[neigh_row, 'Neighborhood'] = df_grp.loc[neigh_row, 'Borough']
# df_grp[neigh_row]


df_result = df_grp
# df_result.shape



# Part 2: Getting coordinates and add to the Toronto DataFrame

!wget -q -O "toronto_coordinates.csv" http://cocl.us/Geospatial_data
print('Coordinates downloaded!')
coors = pd.read_csv('toronto_coordinates.csv')


# print(coors.shape)
# coors.head()

toronto_df_temp = df_result.set_index('PostalCode')
coors_temp = coors.set_index('Postal Code')
toronto_df_coors = pd.concat([toronto_df_temp, coors_temp], axis=1, join='inner')

toronto_df_coors.index.name = 'PostalCode'
toronto_df_coors.reset_index(inplace=True)

print(toronto_df_coors.shape)
toronto_df_coors.head()



Libraries installed!
Coordinates downloaded!
(103, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [4]:

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tl-toronto-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(toronto_df_coors['Latitude'], toronto_df_coors['Longitude'], toronto_df_coors['PostalCode'], toronto_df_coors['Borough'], toronto_df_coors['Neighborhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto