# Commercial Line- Analysis on Business Interruption due to COVID-19

## Create Toronto Borough Map

### Scrape Toronto Geographic Information from Wikipedia Page

In [1]:
# import the library we use to open URLs
import urllib.request
import pandas as pd
import numpy as np

In [181]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(url)

In [182]:
# import the BeautifulSoup library so we can parse HTML and XML documents
from bs4 import BeautifulSoup
# parse the HTML from our URL into the BeautifulSoup parse tree format
soup=BeautifulSoup(page,"lxml")

In [183]:
table=soup.find("table", class_='wikitable sortable')
A=[]
B=[]
C=[]
for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

### Data Cleaning

In [184]:
df=pd.DataFrame(A,columns=['PostalCode'])
df['Borough']=B
df['Neighborhood']=C
# Removing extra \n at the end of each word
df.replace('\n','',regex=True, inplace=True)
#Remove rows with "Not assigned" for Borough
df=df[df.Borough!='Not assigned']
df.replace("/",",",regex=True,inplace=True)

In [185]:
df_group=df.groupby('PostalCode')['Neighborhood'].apply(','.join).reset_index()
df_group
df_final=pd.merge(df_group,df[['PostalCode','Borough']])
column_name=['PostalCode','Borough','Neighborhood']
df=df_final.reindex(columns=column_name)
df.head(103)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov..."
101,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam..."


### Add Latitude and Longitude into the Dataset

In [186]:
df_geo=pd.read_csv("http://cocl.us/Geospatial_data")
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [187]:
df_tot=pd.merge(df,df_geo,left_on='PostalCode', right_on='Postal Code')
del df_tot['Postal Code']
df_tot.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Visualize Toronto Borough Map

In [159]:
!pip install folium
import folium

! pip install geopy
import geopy.geocoders
from geopy.geocoders import Nominatim

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs
import requests



In [188]:
df_group=df_tot.groupby('Borough', as_index=False).mean()
df_group

Unnamed: 0,Borough,Latitude,Longitude
0,Central Toronto,43.70198,-79.398954
1,Downtown Toronto,43.654597,-79.383972
2,East Toronto,43.669436,-79.324654
3,East York,43.700303,-79.335851
4,Etobicoke,43.660043,-79.542074
5,Mississauga,43.636966,-79.615819
6,North York,43.750727,-79.429338
7,Scarborough,43.766229,-79.249085
8,West Toronto,43.652653,-79.44929
9,York,43.690797,-79.472633


In [189]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [190]:
# create map of Toronto using latitude and longitude values

map_toronto1 = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough in zip(df_group['Latitude'], df_group['Longitude'], df_group['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto1)  
    
map_toronto1

## Collect Venues Information using Foursquare API

In [191]:
CLIENT_ID = '13EQVOFZOMNIADMU20H0UWKSKA0AAD5PS3IZ5EOGSQHQMZZ2' # your Foursquare ID
CLIENT_SECRET = '5DKAAAAX1R0LOJKDCZDKYWZAFEF1UBHGRCBWRCTLIWCY5KQG' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 13EQVOFZOMNIADMU20H0UWKSKA0AAD5PS3IZ5EOGSQHQMZZ2
CLIENT_SECRET:5DKAAAAX1R0LOJKDCZDKYWZAFEF1UBHGRCBWRCTLIWCY5KQG


### Explore Borough

In [192]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    LIMIT = 100 # limit of number of venues returned by Foursquare API
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [193]:
toronto_venues = getNearbyVenues(names=df_group['Borough'],
                                   latitudes=df_group['Latitude'],
                                   longitudes=df_group['Longitude']
                                  )
toronto_venues

Central Toronto
Downtown Toronto
East Toronto
East York
Etobicoke
Mississauga
North York
Scarborough
West Toronto
York


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Toronto,43.701980,-79.398954,Balsamico,43.701505,-79.397162,Italian Restaurant
1,Central Toronto,43.701980,-79.398954,"Mysteriously Yours, Mystery Dinner Theatre",43.701222,-79.397251,General Entertainment
2,Central Toronto,43.701980,-79.398954,Jules Cafe Patisserie,43.704138,-79.388413,Dessert Shop
3,Central Toronto,43.701980,-79.398954,Little Sister,43.701552,-79.397163,Indonesian Restaurant
4,Central Toronto,43.701980,-79.398954,Istanbul Cafe & Espresso Bar,43.707891,-79.393049,Café
...,...,...,...,...,...,...,...
995,York,43.690797,-79.472633,Famiglia Baldassarre,43.670388,-79.434999,Italian Restaurant
996,York,43.690797,-79.472633,Buddha Pie,43.661597,-79.479499,Pizza Place
997,York,43.690797,-79.472633,Dutch Dreams,43.682182,-79.418893,Ice Cream Shop
998,York,43.690797,-79.472633,La Bella Managua Restaurant,43.662162,-79.424929,South American Restaurant


In [194]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues['Venue Category'].unique()

There are 181 uniques categories.


array(['Italian Restaurant', 'General Entertainment', 'Dessert Shop',
       'Indonesian Restaurant', 'Café', 'Supermarket', 'Park', 'Tea Room',
       'Plaza', 'Gym', 'Gastropub', 'Deli / Bodega', 'Bakery',
       'Coffee Shop', 'Yoga Studio', 'Trail', 'Indian Restaurant',
       'Burger Joint', 'Bagel Shop', 'Tapas Restaurant', 'Flower Shop',
       'Pizza Place', 'Liquor Store', 'Spa', 'Field', 'Juice Bar',
       'Grocery Store', 'French Restaurant', 'Garden', 'BBQ Joint',
       'Sushi Restaurant', 'Fish & Chips Shop', 'Wine Bar',
       'Mexican Restaurant', 'Historic Site', 'Ice Cream Shop',
       'Athletics & Sports', 'Farmers Market',
       'Middle Eastern Restaurant', 'Brewery', 'Sandwich Place',
       'Sporting Goods Shop', 'Hotel', 'Concert Hall', 'Gourmet Shop',
       'Bubble Tea Shop', 'Sports Bar', 'Vegetarian / Vegan Restaurant',
       'Dog Run', 'Dance Studio', 'Shopping Mall', 'Bike Shop',
       'Other Great Outdoors', 'Breakfast Spot',
       'South American Re

### Select Business Which are Significantly Affected by COVID-19: Restaurants, Bars, Clubs, Hotels, Gyms

In [195]:
df=toronto_venues[['Borough','Venue','Venue Category']]
restaurant =df['Venue Category'].str.contains('Restaurant')
bar=df['Venue Category'].str.contains('Bar')
club=df['Venue Category'].str.contains('club')
gym=df['Venue Category'].str.contains('Gym')
hotel=df['Venue Category'].str.contains('Hotel')

In [196]:
df1=df[restaurant]
df2=df1.append(df[bar])
df3=df2.append(df[gym])
df4=df3.append(df[hotel])
df=df4.append(df[club])

print(df.shape)
print(df1.shape)
print(df2.shape)
print(df3.shape)
print(df4.shape)
df.reset_index()

(318, 3)
(251, 3)
(286, 3)
(304, 3)
(317, 3)


Unnamed: 0,index,Borough,Venue,Venue Category
0,0,Central Toronto,Balsamico,Italian Restaurant
1,3,Central Toronto,Little Sister,Indonesian Restaurant
2,7,Central Toronto,Bar Buca,Italian Restaurant
3,21,Central Toronto,Grazie Ristorante,Italian Restaurant
4,22,Central Toronto,La Vecchia Ristorante,Italian Restaurant
...,...,...,...,...
313,545,Mississauga,Homewood Suites by Hilton,Hotel
314,551,Mississauga,Best Western Plus Travel Hotel Toronto Airport,Hotel
315,557,Mississauga,TownePlace Suites Mississauga-Airport Corporat...,Hotel
316,607,North York,Maryam Hotel,Hotel


### Classify Restaurants, Bars and Clubs as 'Dining In', Gyms as 'Working Out', Hotel as 'Checking In'

In [197]:
df.loc[df['Venue Category'].str.contains('Gym'),'Type']='working Out'
df.loc[df['Venue Category'].str.contains('club'),'Type']='Dining In'
df.loc[df['Venue Category'].str.contains('Bar'),'Type']='Dining In'
df.loc[df['Venue Category'].str.contains('Restaurant'),'Type']='Dining In'
df.loc[df['Venue Category'].str.contains('Restaurant'),'Type']='Checking In'

In [198]:
df.drop(['Venue Category'], axis=1, inplace= True)
df

Unnamed: 0,Borough,Venue,Type
0,Central Toronto,Balsamico,Checking In
3,Central Toronto,Little Sister,Checking In
7,Central Toronto,Bar Buca,Checking In
21,Central Toronto,Grazie Ristorante,Checking In
22,Central Toronto,La Vecchia Ristorante,Checking In
...,...,...,...
545,Mississauga,Homewood Suites by Hilton,
551,Mississauga,Best Western Plus Travel Hotel Toronto Airport,
557,Mississauga,TownePlace Suites Mississauga-Airport Corporat...,
607,North York,Maryam Hotel,


### Analyze and Prepare Boroughs for Cluster Analysis

In [199]:
business_interupted = pd.get_dummies(df[['Type']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
business_interupted['Borough'] = df['Borough'] 
# move neighborhood column to the first column
fixed_columns = [business_interupted.columns[-1]] + list(business_interupted.columns[:-1])
business_interupted = business_interupted[fixed_columns]
business_interupted.shape

(318, 4)

In [211]:
toronto_grouped = business_interupted.groupby('Borough').mean().reset_index()
toronto_grouped

Unnamed: 0,Borough,Checking In,Dining In,working Out
0,Central Toronto,0.785714,0.107143,0.071429
1,Downtown Toronto,0.681818,0.181818,0.045455
2,East Toronto,0.833333,0.083333,0.055556
3,East York,0.727273,0.136364,0.090909
4,Etobicoke,0.8,0.066667,0.1
5,Mississauga,0.704545,0.113636,0.045455
6,North York,0.846154,0.038462,0.076923
7,Scarborough,0.878049,0.02439,0.097561
8,West Toronto,0.756757,0.243243,0.0
9,York,0.84375,0.15625,0.0


## Borough Cluster Analysis - Commercial business

In [212]:
# import k-means from clustering stage
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 0, 1, 0, 1, 0, 1, 1, 2, 2])

In [213]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood
toronto_merged = df_group

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
#toronto_merged = toronto_merged.join(borough_venues_sorted.set_index('Borough'), on='Borough)

print(toronto_merged.shape)
toronto_merged # check the last columns!

(10, 4)


Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels
0,Central Toronto,43.70198,-79.398954,1
1,Downtown Toronto,43.654597,-79.383972,0
2,East Toronto,43.669436,-79.324654,1
3,East York,43.700303,-79.335851,0
4,Etobicoke,43.660043,-79.542074,1
5,Mississauga,43.636966,-79.615819,0
6,North York,43.750727,-79.429338,1
7,Scarborough,43.766229,-79.249085,1
8,West Toronto,43.652653,-79.44929,2
9,York,43.690797,-79.472633,2


## Visualize the Final Clusters

In [214]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [215]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=10).add_to(map_clusters)
       
map_clusters

# Analysis on Residential Property due to COVID-19

## Collect Residential Information using Foursquare API

In [40]:
def getNearbyResidence(names, latitudes, longitudes, radius=50000):
    LIMIT = 100 # limit of number of venues returned by Foursquare API
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            '4e67e38e036454776db1fb3a')
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [41]:
toronto_residence = getNearbyResidence(names=df_group['Borough'],
                                   latitudes=df_group['Latitude'],
                                   longitudes=df_group['Longitude']
                                  )
toronto_residence

Central Toronto
Downtown Toronto
East Toronto
East York
Etobicoke
Mississauga
North York
Scarborough
West Toronto
York


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Toronto,43.701980,-79.398954,One King West Hotel & Residence,43.649139,-79.377876,Hotel
1,Central Toronto,43.701980,-79.398954,The Ritz-Carlton,43.645330,-79.387089,Hotel
2,Central Toronto,43.701980,-79.398954,Four Seasons Hotel Toronto,43.671796,-79.389457,Hotel
3,Central Toronto,43.701980,-79.398954,Bayview Village,43.768527,-79.385494,Shopping Mall
4,Central Toronto,43.701980,-79.398954,Windsor Arms Hotel,43.668781,-79.390850,Hotel
...,...,...,...,...,...,...,...
995,York,43.690797,-79.472633,Applewood Landmark Condominium,43.618677,-79.600328,Residential Building (Apartment / Condo)
996,York,43.690797,-79.472633,La Viva Towns,43.781140,-79.584920,Residential Building (Apartment / Condo)
997,York,43.690797,-79.472633,CAPREIT Apartments,43.611039,-79.601413,Residential Building (Apartment / Condo)
998,York,43.690797,-79.472633,165 Barrington Ave,43.694220,-79.302417,Residential Building (Apartment / Condo)


In [42]:
toronto_residence['Venue Category'].unique()

array(['Hotel', 'Shopping Mall', 'Hospital', 'College Residence Hall',
       'Residential Building (Apartment / Condo)', 'Other Great Outdoors',
       'Housing Development', 'Building', 'Breakfast Spot',
       'College Quad', 'Office'], dtype=object)

### Test Assisted Living Venue Category - No Sufficient Information from Foursquare

In [130]:
# Interesting findings that Assisted Living was removed from the Foursquare after I attempted to refresht the databased 2 days after I finished analysis, also limited info. from Foursquare does make me doubt about the data credibility
df=toronto_residence
df_test=df[df['Venue Category']=='Assisted Living']
df_test

Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


### Select Residential Building (Apartment / Condo)

In [131]:
df=toronto_residence
resi=df['Venue Category']=='Residential Building (Apartment / Condo)'
df=df[resi]
df_condo=df[['Borough','Venue','Venue Category']]
df_condo

Unnamed: 0,Borough,Venue,Venue Category
9,Central Toronto,Summerhill Properties - The Summerhill,Residential Building (Apartment / Condo)
13,Central Toronto,Madison Condos,Residential Building (Apartment / Condo)
14,Central Toronto,The Park Villa,Residential Building (Apartment / Condo)
15,Central Toronto,118 Balliol Street,Residential Building (Apartment / Condo)
16,Central Toronto,Minto Roehampton,Residential Building (Apartment / Condo)
...,...,...,...
995,York,Applewood Landmark Condominium,Residential Building (Apartment / Condo)
996,York,La Viva Towns,Residential Building (Apartment / Condo)
997,York,CAPREIT Apartments,Residential Building (Apartment / Condo)
998,York,165 Barrington Ave,Residential Building (Apartment / Condo)


## Import Nursing and Residential Care Facilities Data from Statisticas Canada 

In [132]:
# Read the downloaded  file by its local path , and assign it to variable "df_rf"
df_rf = pd.read_csv(r"C:\Users\e78202\Desktop\projects\Coursera_Capstone\odhf_v1.csv", header= 0,engine ='python')
df_rf.head()

Unnamed: 0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
0,1,advanced facial & nasal surgery centre,active acute hospital,Hospitals,Canadian Institute for Health Information,,,,T5M4G5,edmonton,ab,,Edmonton,,48,,
1,2,agecare — beverly centre glenmore,long term care,Nursing and residential care facilities,Canadian Institute for Health Information,,,,T2V4S1,calgary,ab,,Calgary,,48,,
2,3,agecare — beverly centre lake midnapore,long term care,Nursing and residential care facilities,Canadian Institute for Health Information,,,,T2X3S3,calgary,ab,,Calgary,,48,,
3,4,agecare — sagewood seniors community inc,long term care,Nursing and residential care facilities,Canadian Institute for Health Information,,,,T1P0E2,strathmore,ab,,Strathmore,,48,,
4,5,agecare — seton,long term care,Nursing and residential care facilities,Canadian Institute for Health Information,,,,T3M2M3,calgary,ab,,Calgary,,48,,


### Select Nursing and Residental Care Faciliites in Toronto

In [133]:
toronto=df_rf['city']=='toronto'
facility=df_rf['odhf_facility_type']=='Nursing and residential care facilities'
df_temp=df_rf[toronto&facility]
df_temp

Unnamed: 0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
2401,2402,147 elder street inc.,retirement home,Nursing and residential care facilities,Province of Ontario,,147,elder street,M3H5G9,toronto,on,,Toronto,3520005.0,35,43.762894,-79.454109
2405,2406,a better way retirement home corp.,retirement home,Nursing and residential care facilities,Province of Ontario,,22,purpledusk trail,M1E4C7,toronto,on,,Toronto,3520005.0,35,43.783661,-79.207601
2406,2407,a-way express courier service,mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,2168,danforth avenue,M4C1K3,toronto,on,,Toronto,3520005.0,35,43.686643,-79.310667
2421,2422,"accommodation, information and support inc.",mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,720,spadina avenue,M5S2T9,toronto,on,,Toronto,3520005.0,35,43.665394,-79.403480
2422,2423,across boundaries: an ethnocultural mental hea...,mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,1,york gate boulevard,M3N3A1,toronto,on,,Toronto,3520005.0,35,43.758995,-79.518807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6091,6092,woodgreen community services,mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,1070,queen street e,M4M3M4,toronto,on,,Toronto,3520005.0,35,43.661902,-79.337052
6104,6105,working for change,mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,250,college street,M5T1R8,toronto,on,,Toronto,3520005.0,35,43.658413,-79.398971
6110,6111,yee hong centre - scarborough finch,long-term care home,Nursing and residential care facilities,Province of Ontario,,60,scottfield drive,M1S5T7,toronto,on,,Toronto,3520005.0,35,43.808717,-79.254665
6111,6112,yee hong centre - scarborough mcnicoll,long-term care home,Nursing and residential care facilities,Province of Ontario,,2311,mcnicoll avenue,M1V5L3,toronto,on,,Toronto,3520005.0,35,43.814232,-79.290217


### Data Preparation for Merging with Toronto Geospatical Data

In [134]:
df=df_temp
array=['retirement home','long term care','long-term care home',]
df=df.loc[df['source_facility_type'].isin(array)]
df_care =df[['facility_name','odhf_facility_type','postal_code']]
df_care.rename(columns = {'facility_name':'Venue','odhf_facility_type':'Venue Category'},inplace=True)
df_care['PostalCode']=df.postal_code.str.slice(0,3)
df_care

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Venue,Venue Category,postal_code,PostalCode
2401,147 elder street inc.,Nursing and residential care facilities,M3H5G9,M3H
2405,a better way retirement home corp.,Nursing and residential care facilities,M1E4C7,M1E
2431,adeline's lodge,Nursing and residential care facilities,M8W1C1,M8W
2433,advent forestview retirement residence,Nursing and residential care facilities,M2R0A8,M2R
2445,alexis lodge retirement residence,Nursing and residential care facilities,M1P2W1,M1P
...,...,...,...,...
6036,weston gardens retirement residence,Nursing and residential care facilities,M6L3C1,M6L
6037,weston terrace care community,Nursing and residential care facilities,M9N3V4,M9N
6039,westside,Nursing and residential care facilities,M9V4J7,M9V
6110,yee hong centre - scarborough finch,Nursing and residential care facilities,M1S5T7,M1S


In [135]:
df_tot

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam...",43.739416,-79.588437


In [136]:
df_senior=pd.merge(df_care,df_tot,left_on='PostalCode',right_on='PostalCode')
df_senior.drop(['postal_code','Neighborhood','Latitude', 'Longitude','PostalCode'],axis=1,inplace=True)
df_senior

Unnamed: 0,Venue,Venue Category,Borough
0,147 elder street inc.,Nursing and residential care facilities,North York
1,kensington place retirement residence,Nursing and residential care facilities,North York
2,l'chaim retirement homes inc.,Nursing and residential care facilities,North York
3,a better way retirement home corp.,Nursing and residential care facilities,Scarborough
4,ehatare nursing home,Nursing and residential care facilities,Scarborough
...,...,...,...
171,wellesley central place,Nursing and residential care facilities,Downtown Toronto
172,the teddington,Nursing and residential care facilities,Central Toronto
173,the village of humber heights,Nursing and residential care facilities,Etobicoke
174,the wexford,Nursing and residential care facilities,Scarborough


In [205]:
df=pd.concat([df_condo,df_senior],axis=0,join='outer',ignore_index=False)
df

Unnamed: 0,Borough,Venue,Venue Category
9,Central Toronto,Summerhill Properties - The Summerhill,Residential Building (Apartment / Condo)
13,Central Toronto,Madison Condos,Residential Building (Apartment / Condo)
14,Central Toronto,The Park Villa,Residential Building (Apartment / Condo)
15,Central Toronto,118 Balliol Street,Residential Building (Apartment / Condo)
16,Central Toronto,Minto Roehampton,Residential Building (Apartment / Condo)
...,...,...,...
171,Downtown Toronto,wellesley central place,Nursing and residential care facilities
172,Central Toronto,the teddington,Nursing and residential care facilities
173,Etobicoke,the village of humber heights,Nursing and residential care facilities
174,Scarborough,the wexford,Nursing and residential care facilities


### Analyze And Prepare Residentical Information For Cluster Analysis

In [216]:
residence = pd.get_dummies(df[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
residence['Borough'] = df['Borough'] 
# move neighborhood column to the first column
fixed_columns = [residence.columns[-1]] + list(residence.columns[:-1])
residence = residence[fixed_columns]
print(residence.shape)

residence_grouped = residence.groupby('Borough').mean().reset_index()
residence_grouped

(1036, 3)


Unnamed: 0,Borough,Nursing and residential care facilities,Residential Building (Apartment / Condo)
0,Central Toronto,0.179245,0.820755
1,Downtown Toronto,0.173077,0.826923
2,East Toronto,0.075269,0.924731
3,East York,0.064516,0.935484
4,Etobicoke,0.218182,0.781818
5,Mississauga,0.0,1.0
6,North York,0.312,0.688
7,Scarborough,0.322835,0.677165
8,West Toronto,0.158416,0.841584
9,York,0.064516,0.935484


## Borough Cluster Analysis - Residence

In [217]:
# import k-means from clustering stage
# set number of clusters
kclusters = 3

residence_grouped_clustering = residence_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(residence_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 0, 0, 1, 0, 2, 2, 1, 0])

In [220]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood
residence_merged = df_group

# add clustering labels
residence_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood

print(residence_merged.shape)
residence_merged # check the last columns!

(10, 4)


Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels
0,Central Toronto,43.70198,-79.398954,1
1,Downtown Toronto,43.654597,-79.383972,1
2,East Toronto,43.669436,-79.324654,0
3,East York,43.700303,-79.335851,0
4,Etobicoke,43.660043,-79.542074,1
5,Mississauga,43.636966,-79.615819,0
6,North York,43.750727,-79.429338,2
7,Scarborough,43.766229,-79.249085,2
8,West Toronto,43.652653,-79.44929,1
9,York,43.690797,-79.472633,0


### Visualize the Final Clusters- Residence

In [209]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [221]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(residence_merged['Latitude'], residence_merged['Longitude'], residence_merged['Borough'], residence_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=10).add_to(map_clusters)
       
map_clusters