# IBM Data Science Coursera Capstone Project
# Using Machine Learning to find the best housing option in Toronto

## Objective: Identify the neighborhood in Toronto, ON, Canada that fits best to a user's preferences 

# Section I: Data Acquisition

Goal: Obtain a distance matrix for each potential venue of interest to the center of each neighborhood

### Data

Public transport, schools, health centers, neighborhood metrics, and places of interests were obtained from:
https://open.toronto.ca/catalogue/

The datasets used are stored in the Datasets folder

Additionally, commercial venues were requested to the FourSquare API


### 1. Packages required

In [37]:
import numpy as np
import pandas as pd
from pandas import json_normalize
from geopy import distance

import json # library to handle JSON files
import requests # library to handle requests

### 2. Parameters

#### 2.2.1 Foursquare credentials

In [38]:
with open('FS_ID.json') as json_data:
    FS_ID = json.load(json_data)

In [39]:
CLIENT_ID = FS_ID["ID"] # your Foursquare ID
CLIENT_SECRET = FS_ID["Secret"] # your Foursquare Secret

# other tor coordinates 43.741667, -79.373333


### 3. Dataset directories

In [3]:
neighborhood_dir = "Datasets/Neighbourhoods.csv"
neigh_soc_dir = "Datasets/neighbourhood-profiles-2016-csv.csv"
neigh_crime_dir = "Datasets/Neighbourhood Crime Rates.csv"


transport_dir = "Datasets/stops.txt"
health_dir = "Datasets/Health Services.csv"
schools_dir = "Datasets/School locations-all types data.csv"
poi_dir = "Datasets/Places of Interest and Attractions.csv"
comm_ctr_dir = "Datasets/Recreation.csv"

### 4. Functions

In [4]:
# function that calculates the distance in meters between two points
def calc_distance_df(TO_df,TO_lat,TO_lon,FROM_df,FROM_lat,FROM_lon,FROM_name):
    ''' This function calculates the distance (in meters) between two coordinates and applies it over two datasets.
        More specifically, it will match the distance of each entry from the "TO" dataset to the first entry of the "FROM" dataset.
        Next, it will append the calculated distance as a new column of "TO" the dataset with the same name as the "FROM" column.
        ***Therefore it is fundamental that the "TO" dataset does not contain columns named as in the "FROM" dataset, otherwise, there
        is risk of overwritting****
        This process will be iterated over the remaining entries of the "FROM" dataset.
        
        
        Requirements:
        geopy package
        
        TO_df, FROM_df = dataset containing the distance to be compared
        TO_lat, FROM_lat = strings indicating the column names corresponding to the column names where latitude coordinates (in decimal format) are located
        TO_lon, FROM_lon = strings similiar to TO_lat but for longitude coordinates
        FROM_name = string indicating the column where the place to be compared is located

        The distance is calculated using the distance.distance method from the geopy package.
    '''
    from geopy import distance
    
    for i in range (FROM_df.shape[0]):
        coord_from = [FROM_df[FROM_lat][i],FROM_df[FROM_lon][i]]
        dist = []
        
        for j in range (TO_df.shape[0]):
            coord_to = [TO_df[TO_lat][j],TO_df[TO_lon][j]]
            temp_dis = distance.distance(coord_from,coord_to).meters
            dist.append(temp_dis)
            
        TO_df[FROM_df[FROM_name][i]] = dist
        
        print(i+1,"set(s) calculated! Remaining: ", FROM_df.shape[0]-i-1, "set(s)")
    
    

### 5. Extract data 

#### 5.1 Neighborhood coordenates

In [5]:
neighborhood_df = pd.read_csv(neighborhood_dir)
neighborhood_df.head()

Unnamed: 0,_id,AREA_ID,AREA_ATTR_ID,PARENT_AREA_ID,AREA_SHORT_CODE,AREA_LONG_CODE,AREA_NAME,AREA_DESC,X,Y,LONGITUDE,LATITUDE,OBJECTID,Shape__Area,Shape__Length,geometry
0,8401,25886861,25926662,49885,94,94,Wychwood (94),Wychwood (94),,,-79.425515,43.676919,16491505,3217960.0,7515.779658,"{u'type': u'Polygon', u'coordinates': (((-79.4..."
1,8402,25886820,25926663,49885,100,100,Yonge-Eglinton (100),Yonge-Eglinton (100),,,-79.40359,43.704689,16491521,3160334.0,7872.021074,"{u'type': u'Polygon', u'coordinates': (((-79.4..."
2,8403,25886834,25926664,49885,97,97,Yonge-St.Clair (97),Yonge-St.Clair (97),,,-79.397871,43.687859,16491537,2222464.0,8130.411276,"{u'type': u'Polygon', u'coordinates': (((-79.3..."
3,8404,25886593,25926665,49885,27,27,York University Heights (27),York University Heights (27),,,-79.488883,43.765736,16491553,25418210.0,25632.335242,"{u'type': u'Polygon', u'coordinates': (((-79.5..."
4,8405,25886688,25926666,49885,31,31,Yorkdale-Glen Park (31),Yorkdale-Glen Park (31),,,-79.457108,43.714672,16491569,11566690.0,13953.408098,"{u'type': u'Polygon', u'coordinates': (((-79.4..."


##### 5.1.1 Clean the dataset (remove unwanted columns)

In [6]:
neighborhood_df = neighborhood_df[["AREA_NAME","_id","AREA_SHORT_CODE","LATITUDE","LONGITUDE"]]
neighborhood_df.head()

Unnamed: 0,AREA_NAME,_id,AREA_SHORT_CODE,LATITUDE,LONGITUDE
0,Wychwood (94),8401,94,43.676919,-79.425515
1,Yonge-Eglinton (100),8402,100,43.704689,-79.40359
2,Yonge-St.Clair (97),8403,97,43.687859,-79.397871
3,York University Heights (27),8404,27,43.765736,-79.488883
4,Yorkdale-Glen Park (31),8405,31,43.714672,-79.457108


#### 5.1.2 Add neighborhood social metrics

In [7]:
neigh_soc_df = pd.read_csv(neigh_soc_dir)
print("The dataset contains {} rows and {} columns".format(neigh_soc_df.shape[0],neigh_soc_df.shape[1]))
neigh_soc_df.head()

The dataset contains 2383 rows and 146 columns


Unnamed: 0,_id,Category,Topic,Data Source,Characteristic,City of Toronto,Agincourt North,Agincourt South-Malvern West,Alderwood,Annex,...,Willowdale West,Willowridge-Martingrove-Richview,Woburn,Woodbine Corridor,Woodbine-Lumsden,Wychwood,Yonge-Eglinton,Yonge-St.Clair,York University Heights,Yorkdale-Glen Park
0,1,Neighbourhood Information,Neighbourhood Information,City of Toronto,Neighbourhood Number,,129,128,20,95,...,37,7,137,64,60,94,100,97,27,31
1,2,Neighbourhood Information,Neighbourhood Information,City of Toronto,TSNS2020 Designation,,No Designation,No Designation,No Designation,No Designation,...,No Designation,No Designation,NIA,No Designation,No Designation,No Designation,No Designation,No Designation,NIA,Emerging Neighbourhood
2,3,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2016",2731571,29113,23757,12054,30526,...,16936,22156,53485,12541,7865,14349,11817,12528,27593,14804
3,4,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2011",2615060,30279,21988,11904,29177,...,15004,21343,53350,11703,7826,13986,10578,11652,27713,14687
4,5,Population,Population and dwellings,Census Profile 98-316-X2016001,Population Change 2011-2016,4.50%,-3.90%,8.00%,1.30%,4.60%,...,12.90%,3.80%,0.30%,7.20%,0.50%,2.60%,11.70%,7.50%,-0.40%,0.80%


In [8]:
neigh_soc_df.drop(["_id",'Category', 'Topic', 'Data Source','City of Toronto'],axis=1,inplace=True)
neigh_soc_df.head()

Unnamed: 0,Characteristic,Agincourt North,Agincourt South-Malvern West,Alderwood,Annex,Banbury-Don Mills,Bathurst Manor,Bay Street Corridor,Bayview Village,Bayview Woods-Steeles,...,Willowdale West,Willowridge-Martingrove-Richview,Woburn,Woodbine Corridor,Woodbine-Lumsden,Wychwood,Yonge-Eglinton,Yonge-St.Clair,York University Heights,Yorkdale-Glen Park
0,Neighbourhood Number,129,128,20,95,42,34,76,52,49,...,37,7,137,64,60,94,100,97,27,31
1,TSNS2020 Designation,No Designation,No Designation,No Designation,No Designation,No Designation,No Designation,No Designation,No Designation,No Designation,...,No Designation,No Designation,NIA,No Designation,No Designation,No Designation,No Designation,No Designation,NIA,Emerging Neighbourhood
2,"Population, 2016",29113,23757,12054,30526,27695,15873,25797,21396,13154,...,16936,22156,53485,12541,7865,14349,11817,12528,27593,14804
3,"Population, 2011",30279,21988,11904,29177,26918,15434,19348,17671,13530,...,15004,21343,53350,11703,7826,13986,10578,11652,27713,14687
4,Population Change 2011-2016,-3.90%,8.00%,1.30%,4.60%,2.90%,2.80%,33.30%,21.10%,-2.80%,...,12.90%,3.80%,0.30%,7.20%,0.50%,2.60%,11.70%,7.50%,-0.40%,0.80%


In [9]:
neigh_soc_df = neigh_soc_df.set_index("Characteristic").T[["Neighbourhood Number","  After-tax income: Average amount ($)","Population density per square kilometre"]]
print("The dataset contains {} rows and {} columns".format(neigh_soc_df.shape[0],neigh_soc_df.shape[1]))
neigh_soc_df.head()

The dataset contains 140 rows and 3 columns


Characteristic,Neighbourhood Number,After-tax income: Average amount ($),Population density per square kilometre
Agincourt North,129,26955,3929
Agincourt South-Malvern West,128,27928,3034
Alderwood,20,39159,2435
Annex,95,80138,10863
Banbury-Don Mills,42,51874,2775


#### 5.1.3 Add neighborhood crime metrics

In [10]:
neigh_crime_df = pd.read_csv(neigh_crime_dir)
print("The dataset contains {} rows and {} columns".format(neigh_crime_df.shape[0],neigh_crime_df.shape[1]))
neigh_crime_df.head()

The dataset contains 140 rows and 62 columns


Unnamed: 0,_id,OBJECTID,Neighbourhood,Hood_ID,Population,Assault_2014,Assault_2015,Assault_2016,Assault_2017,Assault_2018,...,TheftOver_2016,TheftOver_2017,TheftOver_2018,TheftOver_2019,TheftOver_AVG,TheftOver_CHG,TheftOver_Rate_2019,Shape__Area,Shape__Length,geometry
0,1,16,South Parkdale,85,21849,202,226,231,229,220,...,9,10,9,22,10.0,1.44,100.7,2286974.0,10802.83216,"{u'type': u'Polygon', u'coordinates': (((-79.4..."
1,2,17,South Riverdale,70,27876,215,207,236,243,304,...,22,27,24,21,21.3,-0.13,75.3,10964570.0,43080.724701,"{u'type': u'Polygon', u'coordinates': (((-79.3..."
2,3,18,St.Andrew-Windfields,40,17812,53,41,48,45,55,...,8,7,6,6,8.5,0.0,33.7,7299580.0,13025.997456,"{u'type': u'Polygon', u'coordinates': (((-79.3..."
3,4,19,Taylor-Massey,61,15683,127,92,97,107,123,...,5,2,4,3,3.5,-0.25,19.1,1062970.0,5940.70005,"{u'type': u'Polygon', u'coordinates': (((-79.2..."
4,5,20,Humber Summit,21,12416,76,89,118,116,109,...,18,18,15,22,17.3,0.47,177.2,7966905.0,12608.573118,"{u'type': u'Polygon', u'coordinates': (((-79.5..."


In [11]:
neigh_crime_df = neigh_crime_df[['Neighbourhood', 'Hood_ID','Assault_Rate_2019','AutoTheft_Rate_2019',
                                 'BreakandEnter_Rate_2019','Homicide_Rate_2019','Homicide_Rate_2019','TheftOver_Rate_2019']]
print("The dataset contains {} rows and {} columns".format(neigh_crime_df.shape[0],neigh_crime_df.shape[1]))
neigh_crime_df.head()

The dataset contains 140 rows and 8 columns


Unnamed: 0,Neighbourhood,Hood_ID,Assault_Rate_2019,AutoTheft_Rate_2019,BreakandEnter_Rate_2019,Homicide_Rate_2019,Homicide_Rate_2019.1,TheftOver_Rate_2019
0,South Parkdale,85,1148.8,91.5,407.3,4.6,4.6,100.7
1,South Riverdale,70,936.3,143.5,477.1,0.0,0.0,75.3
2,St.Andrew-Windfields,40,325.6,196.5,466.0,0.0,0.0,33.7
3,Taylor-Massey,61,777.9,76.5,401.7,6.4,6.4,19.1
4,Humber Summit,21,950.4,1087.3,459.1,24.2,24.2,177.2


#### 5.2 Public transport

In [12]:
transport_df = pd.read_csv(transport_dir)
transport_df.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,262,662,DANFORTH RD AT KENNEDY RD,,43.714379,-79.260939,,,,,,2
1,263,929,DAVENPORT RD AT BEDFORD RD,,43.674448,-79.399659,,,,,,1
2,264,940,DAVENPORT RD AT DUPONT ST,,43.675511,-79.401938,,,,,,2
3,265,1871,DAVISVILLE AVE AT CLEVELAND ST,,43.702088,-79.378112,,,,,,1
4,266,11700,DISCO RD AT ATTWELL DR,,43.701362,-79.594843,,,,,,1


##### 5.2.1 Clean the dataset (remove unwanted columns)

In [13]:
transport_df = transport_df[["stop_name","stop_id","stop_code","stop_lat","stop_lon"]]
print("The dataset contains {} rows and {} columns".format(transport_df.shape[0],transport_df.shape[1]))
transport_df.head()

The dataset contains 9472 rows and 5 columns


Unnamed: 0,stop_name,stop_id,stop_code,stop_lat,stop_lon
0,DANFORTH RD AT KENNEDY RD,262,662,43.714379,-79.260939
1,DAVENPORT RD AT BEDFORD RD,263,929,43.674448,-79.399659
2,DAVENPORT RD AT DUPONT ST,264,940,43.675511,-79.401938
3,DAVISVILLE AVE AT CLEVELAND ST,265,1871,43.702088,-79.378112
4,DISCO RD AT ATTWELL DR,266,11700,43.701362,-79.594843


#### 5.2.2 Calculate distance of each stop to the center of the neighborhood

In [14]:
calc_distance_df(TO_df = transport_df,TO_lat = "stop_lat" ,TO_lon ="stop_lon",
                 FROM_df = neighborhood_df,FROM_lat = "LATITUDE",FROM_lon = "LONGITUDE",FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [15]:
transport_df.head()

Unnamed: 0,stop_name,stop_id,stop_code,stop_lat,stop_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,DANFORTH RD AT KENNEDY RD,262,662,43.714379,-79.260939,13904.800715,11547.471205,11424.284204,19228.329565,15809.248752,...,13681.45672,2509.941224,24482.471103,17739.447255,17264.101326,1242.619971,12901.609953,23189.263877,21233.852452,9999.456135
1,DAVENPORT RD AT BEDFORD RD,263,929,43.674448,-79.399659,2103.070814,3374.918172,1496.98926,12432.616972,6435.998695,...,2939.542744,12284.403257,12451.500698,5834.650155,5922.013535,12581.315367,2329.61321,12256.501866,9243.568359,15135.05774
2,DAVENPORT RD AT DUPONT ST,264,940,43.675511,-79.401938,1907.69798,3244.63267,1410.577189,12230.296127,6221.896689,...,2722.861378,12374.164828,12324.759554,5668.725638,5717.876679,12693.249333,2468.787893,12051.208111,9096.711805,15115.079353
3,DAVISVILLE AVE AT CLEVELAND ST,265,1871,43.702088,-79.378112,4735.596475,2073.898644,2244.243836,11386.406514,6518.623581,...,4169.791624,9281.227445,15351.449123,8429.844494,7740.102342,9839.683901,5608.66763,13687.717366,11967.443023,11610.873704
4,DISCO RD AT ATTWELL DR,266,11700,43.701362,-79.594843,13919.361183,15420.396066,15949.913848,11136.924568,11199.284397,...,13521.076454,26251.511566,8612.850717,10622.270897,10099.233499,27088.279614,16796.603856,3796.124921,8627.213242,24925.303401


In [737]:
transport_df["Group"]="Public transport"
transport_df.head()

Unnamed: 0,stop_name,stop_id,stop_code,stop_lat,stop_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,DANFORTH RD AT KENNEDY RD,262,662,43.714379,-79.260939,13904.800715,11547.471205,11424.284204,19228.329565,15809.248752,...,2509.941224,24482.471103,17739.447255,17264.101326,1242.619971,12901.609953,23189.263877,21233.852452,9999.456135,Public transport
1,DAVENPORT RD AT BEDFORD RD,263,929,43.674448,-79.399659,2103.070814,3374.918172,1496.98926,12432.616972,6435.998695,...,12284.403257,12451.500698,5834.650155,5922.013535,12581.315367,2329.61321,12256.501866,9243.568359,15135.05774,Public transport
2,DAVENPORT RD AT DUPONT ST,264,940,43.675511,-79.401938,1907.69798,3244.63267,1410.577189,12230.296127,6221.896689,...,12374.164828,12324.759554,5668.725638,5717.876679,12693.249333,2468.787893,12051.208111,9096.711805,15115.079353,Public transport
3,DAVISVILLE AVE AT CLEVELAND ST,265,1871,43.702088,-79.378112,4735.596475,2073.898644,2244.243836,11386.406514,6518.623581,...,9281.227445,15351.449123,8429.844494,7740.102342,9839.683901,5608.66763,13687.717366,11967.443023,11610.873704,Public transport
4,DISCO RD AT ATTWELL DR,266,11700,43.701362,-79.594843,13919.361183,15420.396066,15949.913848,11136.924568,11199.284397,...,26251.511566,8612.850717,10622.270897,10099.233499,27088.279614,16796.603856,3796.124921,8627.213242,24925.303401,Public transport


#### 5.3 Health services

In [16]:
health_df = pd.read_csv(health_dir)
health_df.head()

Unnamed: 0,_id,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,OFFICE_PHONE,EMAIL,WEBSITE,ELIGIBILITY,DESCRIPTION_SERVICE,...,DATE_UPDATED,ADDRESS_POINT_ID,X,Y,LONGITUDE,LATITUDE,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1,800,Rouge Valley Health System,"2867 Ellesmere Rd, Toronto, ON M1E 4B9","Morningside, 135",416-284-8131,patientrelations@rougevalley.ca,"<a href=""http://www.rougevalley.ca"" target=""_b...",,Acute care hospital * complex continuing care ...,...,2015-12-30T05:00:00,6362828,,,,,2867 Ellesmere Rd,Scarborough,M1E 4B9,"{u'type': u'Point', u'coordinates': (-79.20496..."
1,2,801,"University Health Network, Toronto Rehabilitat...","130 Dunn Ave, Toronto, ON M6K 2R7","South Parkdale, 85",416-597-3422 ext 2000 ; 416-59-REHAB ext 2000,,"<a href=""http://www.uhn.ca/torontorehab"" targe...",,Complex continuing care facility * outpatient ...,...,2014-01-16T05:00:00,8168605,,,,,130 Dunn Ave,former Toronto,M6K 2R7,"{u'type': u'Point', u'coordinates': (-79.43308..."
2,3,1003,Cliffcrest Health Centre,"2890 Kingston Rd, Toronto, ON M1M 1N5",,416-267-2238,,"<a href=""http://"" target=""_blank""></a>",No restrictions,"Offers dental and denture work, audiology serv...",...,2016-02-16T05:00:00,352165,,,,,2890 Kingston Rd,Scarborough,M1M 1N5,"{u'type': u'Point', u'coordinates': (-79.23515..."
3,4,1004,Golden Care Dental Services,"1571 Sandhurst Circle, Scarborough, ON M1V...",,416-484-6228,info@GoldenCareDentalServices.com,"<a href=""http://www.goldencaredentalservices.c...",seniors who are not mobile,Mobile dental services bringing services to pa...,...,2015-06-24T04:00:00,7531002,,,,,1571 Sandhurst Crcl,Scarborough,M1V 1V2,"{u'type': u'Point', u'coordinates': (-79.26931..."
4,5,1005,"Toronto, City of","160 Burough Dr, Toronto, ON M1P 4N8",,416-338-7442,,"<a href=""http://www.toronto.ca/health/dental"" ...","Cannot afford to go to private dentist, 0-13 y...",The City of Toronto's Public Health Dental Cli...,...,2014-11-04T05:00:00,12697721,,,,,160 Borough Dr,Scarborough,M1P 4N8,"{u'type': u'Point', u'coordinates': (-79.25648..."


#### 5.3.1 Clean the dataset (remove unwanted columns, extract coordinates from geometry column)

In [17]:
health_df = health_df[["AGENCY_NAME","geometry"]]
health_df.head()

Unnamed: 0,AGENCY_NAME,geometry
0,Rouge Valley Health System,"{u'type': u'Point', u'coordinates': (-79.20496..."
1,"University Health Network, Toronto Rehabilitat...","{u'type': u'Point', u'coordinates': (-79.43308..."
2,Cliffcrest Health Centre,"{u'type': u'Point', u'coordinates': (-79.23515..."
3,Golden Care Dental Services,"{u'type': u'Point', u'coordinates': (-79.26931..."
4,"Toronto, City of","{u'type': u'Point', u'coordinates': (-79.25648..."


In [18]:
health_lat = []
health_lon = []
for i in range(health_df.geometry.shape[0]):
    coords = health_df.geometry[i].split(r"'coordinates': (")[1].split(r")")[0].split(",")
    health_lat.append(float(coords[1]))
    health_lon.append(float(coords[0]))

health_df["health_lat"] = health_lat
health_df["health_lon"] = health_lon

health_df.drop(["geometry"],axis=1,inplace=True)
health_df.head()

Unnamed: 0,AGENCY_NAME,health_lat,health_lon
0,Rouge Valley Health System,43.780081,-79.204961
1,"University Health Network, Toronto Rehabilitat...",43.635,-79.433083
2,Cliffcrest Health Centre,43.72323,-79.235151
3,Golden Care Dental Services,43.809221,-79.269312
4,"Toronto, City of",43.773051,-79.25648


In [19]:
health_df.shape

(68, 3)

#### 5.3.2 Calculate distance of each health center to the center of the neighborhood

In [20]:
calc_distance_df(TO_df = health_df,TO_lat = "health_lat" ,TO_lon ="health_lon",
                 FROM_df = neighborhood_df,FROM_lat = "LATITUDE",FROM_lon = "LONGITUDE",FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [21]:
health_df.head()

Unnamed: 0,AGENCY_NAME,health_lat,health_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Rouge Valley Health System,43.780081,-79.204961,21146.148665,18060.148511,18615.338947,22914.459377,21570.504427,27128.474092,17927.881202,...,20471.79273,7364.930942,31766.392653,24828.054443,23891.520331,7524.957236,20921.909028,29055.05051,28360.153803,8953.60904
1,"University Health Network, Toronto Rehabilitat...",43.635,-79.433083,4697.354489,8100.053113,6523.711592,15206.176354,9061.570482,5657.768819,13268.850164,...,6278.180709,17089.515595,8897.176107,4788.435257,6429.415408,17180.204707,3551.522523,11672.691234,6582.405023,20268.888988
2,Cliffcrest Health Centre,43.72323,-79.235151,16184.748457,13730.007366,13691.63169,20976.570898,17911.402281,22268.501484,15657.955123,...,15916.617085,3295.265344,26776.665577,20016.226283,19495.885229,2049.400746,15189.191322,25346.283931,23518.067192,10260.003464
3,Golden Care Dental Services,43.809221,-79.269312,19349.302843,15869.096045,17001.238046,18322.164805,18413.240286,24862.336777,13922.570623,...,18279.998366,8209.951937,29476.038202,22621.080175,21291.147578,9323.592516,20133.774888,25558.919127,26021.231019,3902.861241
4,"Toronto, City of",43.773051,-79.25648,17308.655427,14075.945045,14810.950921,18729.821604,17413.806935,23190.237285,13709.283397,...,16511.030724,4380.868345,27840.347277,20894.151225,19851.538743,5286.473663,17463.215426,24874.489451,24407.471495,5276.577986


In [738]:
health_df["Group"]="Health Centers"
health_df.head()

Unnamed: 0,AGENCY_NAME,health_lat,health_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Rouge Valley Health System,43.780081,-79.204961,21146.148665,18060.148511,18615.338947,22914.459377,21570.504427,27128.474092,17927.881202,...,7364.930942,31766.392653,24828.054443,23891.520331,7524.957236,20921.909028,29055.05051,28360.153803,8953.60904,Health Centers
1,"University Health Network, Toronto Rehabilitat...",43.635,-79.433083,4697.354489,8100.053113,6523.711592,15206.176354,9061.570482,5657.768819,13268.850164,...,17089.515595,8897.176107,4788.435257,6429.415408,17180.204707,3551.522523,11672.691234,6582.405023,20268.888988,Health Centers
2,Cliffcrest Health Centre,43.72323,-79.235151,16184.748457,13730.007366,13691.63169,20976.570898,17911.402281,22268.501484,15657.955123,...,3295.265344,26776.665577,20016.226283,19495.885229,2049.400746,15189.191322,25346.283931,23518.067192,10260.003464,Health Centers
3,Golden Care Dental Services,43.809221,-79.269312,19349.302843,15869.096045,17001.238046,18322.164805,18413.240286,24862.336777,13922.570623,...,8209.951937,29476.038202,22621.080175,21291.147578,9323.592516,20133.774888,25558.919127,26021.231019,3902.861241,Health Centers
4,"Toronto, City of",43.773051,-79.25648,17308.655427,14075.945045,14810.950921,18729.821604,17413.806935,23190.237285,13709.283397,...,4380.868345,27840.347277,20894.151225,19851.538743,5286.473663,17463.215426,24874.489451,24407.471495,5276.577986,Health Centers


#### 5.4 Schools

In [22]:
schools_df = pd.read_csv(schools_dir)
schools_df.head()

Unnamed: 0,_id,OBJECTID,GEO_ID,NAME,SCHOOL_LEVEL,SCHOOL_TYPE,BOARD_NAME,SOURCE_ADDRESS,SCHOOL_TYPE_DESC,ADDRESS_POINT_ID,...,LO_NUM,LO_NUM_SUF,HI_NUM,HI_NUM_SUF,LINEAR_NAME_ID,X,Y,LATITUDE,LONGITUDE,geometry
0,21813,1,330677,A PLUS ACADEMY OF ADVANCEMENT,,PR,,2425 EGLINTON AVE E,Private,330677,...,2425,,,,125,323561.684,4843596.049,43.732091,-79.267107,"{u'type': u'Point', u'coordinates': (-79.26710..."
1,21814,2,524780,A R S ARMENIAN PRIVATE SCHOOL,,PR,,50 HALLCROWN PL,Private,524780,...,50,,,,5778,319037.215,4847803.338,43.770062,-79.323159,"{u'type': u'Point', u'coordinates': (-79.32315..."
2,21815,3,20258267,A Y J GLOBAL ACADEMY,,PR,,4 LANSING SQ,Private,20258267,...,4,,,,6007,318870.404,4848250.651,43.774091,-79.32522,"{u'type': u'Point', u'coordinates': (-79.32521..."
3,21816,4,517961,A Y JACKSON SECONDARY SCHOOL,,EP,Toronto District School Board,50 FRANCINE DR,English Public,517961,...,50,,,,5639,315536.964,4851708.323,43.80527,-79.366559,"{u'type': u'Point', u'coordinates': (-79.36655..."
4,21817,5,13967401,ABACUS MONTESSORI LEARNING CENTRE,,PR,,4 CREDIT UNION DR,Private,13967401,...,4,,,,5294,319381.84,4842846.853,43.725441,-79.319013,"{u'type': u'Point', u'coordinates': (-79.31900..."


##### 5.4.1 Clean the dataset (remove unwanted columns)

In [23]:
schools_df = schools_df[["NAME","SCHOOL_TYPE_DESC","LATITUDE","LONGITUDE"]]
print("The dataset contains {} rows and {} columns".format(schools_df.shape[0],schools_df.shape[1]))
schools_df.head()

The dataset contains 1194 rows and 4 columns


Unnamed: 0,NAME,SCHOOL_TYPE_DESC,LATITUDE,LONGITUDE
0,A PLUS ACADEMY OF ADVANCEMENT,Private,43.732091,-79.267107
1,A R S ARMENIAN PRIVATE SCHOOL,Private,43.770062,-79.323159
2,A Y J GLOBAL ACADEMY,Private,43.774091,-79.32522
3,A Y JACKSON SECONDARY SCHOOL,English Public,43.80527,-79.366559
4,ABACUS MONTESSORI LEARNING CENTRE,Private,43.725441,-79.319013


#### 5.4.2 Calculate distance of each school to the center of the neighborhood

In [24]:
calc_distance_df(TO_df = schools_df,TO_lat = "LATITUDE" ,TO_lon ="LONGITUDE",
                 FROM_df = neighborhood_df,FROM_lat = "LATITUDE",FROM_lon = "LONGITUDE",FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [25]:
print("The dataset contains {} rows and {} columns".format(schools_df.shape[0],schools_df.shape[1]))
schools_df.head()

The dataset contains 1194 rows and 144 columns


Unnamed: 0,NAME,SCHOOL_TYPE_DESC,LATITUDE,LONGITUDE,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,A PLUS ACADEMY OF ADVANCEMENT,Private,43.732091,-79.267107,14163.392786,11412.015401,11628.510545,18249.615542,15431.71522,20235.0663,...,13709.465573,564.816698,24821.23546,17949.330095,17255.279217,905.953381,13645.704398,22922.579364,21481.94719,8017.727928
1,A R S ARMENIAN PRIVATE SCHOOL,Private,43.770062,-79.323159,13233.26601,9733.395499,10938.550926,13352.411487,12421.614133,18728.572091,...,12139.27321,5614.98278,23350.929775,16481.693047,15183.212854,7072.447827,14255.258101,19750.004476,19895.041064,2942.530288
2,A Y J GLOBAL ACADEMY,Private,43.774091,-79.32522,13485.957481,9965.801881,11227.524088,13210.08453,12507.844386,18909.105311,...,12353.885997,6046.614686,23519.538664,16675.19278,15335.332862,7506.772149,14596.110437,19782.065604,20064.994388,2564.446011
3,A Y JACKSON SECONDARY SCHOOL,English Public,43.80527,-79.366559,15030.690846,11566.358578,13286.791126,10781.764916,12429.749827,19459.843187,...,13579.06347,10849.656861,23817.397476,17446.398113,15740.946282,12310.056397,17036.969844,18779.152265,20467.422896,4354.216505
4,ABACUS MONTESSORI LEARNING CENTRE,Private,43.725441,-79.319013,10137.218378,7195.376268,7604.844848,14396.549387,11192.152301,16149.884946,...,9541.647586,3908.611548,20774.394358,13851.852524,13053.967905,4724.212912,10176.931871,18676.307247,17389.221896,7818.188136


In [739]:
schools_df["Group"]="Schools"
schools_df.head()

Unnamed: 0,NAME,SCHOOL_TYPE_DESC,LATITUDE,LONGITUDE,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,A PLUS ACADEMY OF ADVANCEMENT,Private,43.732091,-79.267107,14163.392786,11412.015401,11628.510545,18249.615542,15431.71522,20235.0663,...,564.816698,24821.23546,17949.330095,17255.279217,905.953381,13645.704398,22922.579364,21481.94719,8017.727928,Schools
1,A R S ARMENIAN PRIVATE SCHOOL,Private,43.770062,-79.323159,13233.26601,9733.395499,10938.550926,13352.411487,12421.614133,18728.572091,...,5614.98278,23350.929775,16481.693047,15183.212854,7072.447827,14255.258101,19750.004476,19895.041064,2942.530288,Schools
2,A Y J GLOBAL ACADEMY,Private,43.774091,-79.32522,13485.957481,9965.801881,11227.524088,13210.08453,12507.844386,18909.105311,...,6046.614686,23519.538664,16675.19278,15335.332862,7506.772149,14596.110437,19782.065604,20064.994388,2564.446011,Schools
3,A Y JACKSON SECONDARY SCHOOL,English Public,43.80527,-79.366559,15030.690846,11566.358578,13286.791126,10781.764916,12429.749827,19459.843187,...,10849.656861,23817.397476,17446.398113,15740.946282,12310.056397,17036.969844,18779.152265,20467.422896,4354.216505,Schools
4,ABACUS MONTESSORI LEARNING CENTRE,Private,43.725441,-79.319013,10137.218378,7195.376268,7604.844848,14396.549387,11192.152301,16149.884946,...,3908.611548,20774.394358,13851.852524,13053.967905,4724.212912,10176.931871,18676.307247,17389.221896,7818.188136,Schools


#### 5.5 Places of interest and Atractions

In [26]:
poi_df = pd.read_csv(poi_dir)
poi_df.head()

Unnamed: 0,_id,ID,ADDRESS_INFO,NAME,CATEGORY,PHONE,EMAIL,WEBSITE,GEOID,RECEIVED_DATA_DATE,...,Y,LONGITUDE,LATITUDE,OBJECTID,MI_PRINX,ATTRACTION_LEVEL,ATTRACTION_DESC,IMAGE_NAME,MAP_ACCESS,geometry
0,1,16,,BMO Field,Sports / Entertainment Venue,416-815-5982,,www.bmofield.com,20229243.0,,...,,,,16,4163950.0,2,BMO Field is home to the Toronto FC (Major Lea...,BMOField.jpg,Y,"{u'type': u'Point', u'coordinates': (-79.41861..."
1,2,1,,Aga Khan Museum,Museum,416-646-4677,,www.agakhanmuseum.org,10142948.0,,...,,,,1,4094277.0,1,"Dedicated to sharing the artistic, intellectua...",AgaKhan.jpg,Y,"{u'type': u'Point', u'coordinates': (-79.33233..."
2,3,2,,Air Canada Centre,Sports / Entertainment Venue,416-815-5500,,www.theaircanadacentre.com/,7929257.0,,...,,,,2,3176821.0,1,The Air Canada Centre is a multi-purpose indoo...,AirCanadaCentre.jpg,Y,"{u'type': u'Point', u'coordinates': (-79.37900..."
3,4,3,,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,416-924-6211,,www.algreentheatre.ca/,8418224.0,,...,,,,3,1951177.0,4,The Al Green Theatre is used for social and c...,,N,"{u'type': u'Point', u'coordinates': (-79.40406..."
4,5,4,,Alexander Muir Memorial Gardens,Nature/ Park,416-338-4386,,http://www1.toronto.ca/parks/prd/facilities/co...,10154587.0,,...,,,,4,4217141.0,4,Named after the famous composer Alexander Muir...,Imageplaceholder.jpg,Y,"{u'type': u'Point', u'coordinates': (-79.40116..."


##### 5.5.1 Clean the dataset (remove unwanted columns, extract lat long from geometry column)

In [27]:
poi_df = poi_df[["NAME","CATEGORY","geometry"]]
print("The dataset contains {} rows and {} columns".format(poi_df.shape[0],poi_df.shape[1]))
poi_df.head()

The dataset contains 175 rows and 3 columns


Unnamed: 0,NAME,CATEGORY,geometry
0,BMO Field,Sports / Entertainment Venue,"{u'type': u'Point', u'coordinates': (-79.41861..."
1,Aga Khan Museum,Museum,"{u'type': u'Point', u'coordinates': (-79.33233..."
2,Air Canada Centre,Sports / Entertainment Venue,"{u'type': u'Point', u'coordinates': (-79.37900..."
3,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,"{u'type': u'Point', u'coordinates': (-79.40406..."
4,Alexander Muir Memorial Gardens,Nature/ Park,"{u'type': u'Point', u'coordinates': (-79.40116..."


In [28]:
poi_lat = []
poi_lon = []
for i in range(poi_df.geometry.shape[0]):
    coords = poi_df.geometry[i].split(r"'coordinates': (")[1].split(r")")[0].split(",")
    poi_lat.append(float(coords[1]))
    poi_lon.append(float(coords[0]))

poi_df["poi_lat"] = poi_lat
poi_df["poi_lon"] = poi_lon

poi_df.drop(["geometry"],axis=1,inplace=True)
print("The dataset contains {} rows and {} columns".format(poi_df.shape[0],poi_df.shape[1]))
poi_df.head()

The dataset contains 175 rows and 4 columns


Unnamed: 0,NAME,CATEGORY,poi_lat,poi_lon
0,BMO Field,Sports / Entertainment Venue,43.632664,-79.418614
1,Aga Khan Museum,Museum,43.725386,-79.332331
2,Air Canada Centre,Sports / Entertainment Venue,43.643438,-79.379001
3,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,43.666207,-79.404062
4,Alexander Muir Memorial Gardens,Nature/ Park,43.721534,-79.401164


#### 5.5.2 Calculate distance of each POI to the center of the neighborhood

In [29]:
calc_distance_df(TO_df = poi_df,TO_lat = "poi_lat" ,TO_lon ="poi_lon",
                 FROM_df = neighborhood_df,FROM_lat = "LATITUDE",FROM_lon = "LONGITUDE",FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [30]:
print("The dataset contains {} rows and {} columns".format(poi_df.shape[0],poi_df.shape[1]))
poi_df.head()

The dataset contains 175 rows and 144 columns


Unnamed: 0,NAME,CATEGORY,poi_lat,poi_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,BMO Field,Sports / Entertainment Venue,43.632664,-79.418614,4948.413884,8093.650887,6356.628623,15833.10567,9625.823552,6826.038187,...,6563.437563,16402.966878,10063.717532,5786.198142,7274.190756,16411.013755,2891.59329,12766.402033,7773.453214,19979.006886
1,Aga Khan Museum,Museum,43.725386,-79.332331,9242.162144,6185.975906,6729.74824,13383.222857,10125.045362,15206.267598,...,8564.229571,4948.679364,19843.284675,12905.967647,12046.928746,5797.322502,9543.647569,17616.143715,16440.003325,7951.182505
2,Air Canada Centre,Sports / Entertainment Venue,43.643438,-79.379001,5283.518492,7088.444001,5164.789063,16219.820506,10114.784542,9569.587715,...,6616.490558,13344.885422,13305.150506,7936.423134,8812.684262,13218.087242,1851.674416,14951.926791,10674.277388,17709.395319
3,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,43.666207,-79.404062,2099.982618,4275.840866,2456.961601,13000.510209,6876.439586,7483.331473,...,3382.969228,13098.022885,11808.869978,5437.448132,5850.107931,13328.211472,1509.636648,12153.781084,8706.628679,16112.34184
4,Alexander Muir Memorial Gardens,Nature/ Park,43.721534,-79.401164,5331.452244,1881.686648,3750.872623,8605.053916,4572.299847,10452.763811,...,3975.012586,10482.194401,15071.552084,8223.163554,6921.82332,11352.631703,7559.549111,12085.672645,11615.631606,10821.752287


#### 5.5.3 Simplify categories

In [743]:
poi_df["CATEGORY"].unique()

array(['Sports / Entertainment Venue', 'Museum', 'Performing Arts',
       'Nature/ Park', 'Gallery', 'Landmark',
       'Convention & Trade Centres', 'Transportation', 'Attraction',
       'Garden / Conservatory', 'Visitor Information', 'Featured Park'],
      dtype=object)

In [744]:
poi_cat_dict = {"Museum":['Museum','Gallery'], "Sports":['Sports / Entertainment Venue'], 
                "Large Oudoor Attr":['Convention & Trade Centres'],
                "Outdoor Attr":['Nature/ Park',  'Landmark','Featured Park','Garden / Conservatory', 'Attraction'], 
                "Cinema":['Performing Arts'],"Non usable":['Visitor Information','Transportation']}



In [745]:
poi_df["Group"]="TBD"
poi_df['Group'].unique()

array(['TBD'], dtype=object)

In [747]:
for i in list(poi_cat_dict):
        poi_df.loc[poi_df['CATEGORY'].isin(poi_cat_dict[i]),'Group']=i
    
poi_df['Group'].unique()    

array(['Sports', 'Museum', 'Cinema', 'Outdoor Attr', 'Large Oudoor Attr',
       'Non usable'], dtype=object)

In [748]:
poi_df.head()

Unnamed: 0,NAME,CATEGORY,poi_lat,poi_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,BMO Field,Sports / Entertainment Venue,43.632664,-79.418614,4948.413884,8093.650887,6356.628623,15833.10567,9625.823552,6826.038187,...,16402.966878,10063.717532,5786.198142,7274.190756,16411.013755,2891.59329,12766.402033,7773.453214,19979.006886,Sports
1,Aga Khan Museum,Museum,43.725386,-79.332331,9242.162144,6185.975906,6729.74824,13383.222857,10125.045362,15206.267598,...,4948.679364,19843.284675,12905.967647,12046.928746,5797.322502,9543.647569,17616.143715,16440.003325,7951.182505,Museum
2,Air Canada Centre,Sports / Entertainment Venue,43.643438,-79.379001,5283.518492,7088.444001,5164.789063,16219.820506,10114.784542,9569.587715,...,13344.885422,13305.150506,7936.423134,8812.684262,13218.087242,1851.674416,14951.926791,10674.277388,17709.395319,Sports
3,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,43.666207,-79.404062,2099.982618,4275.840866,2456.961601,13000.510209,6876.439586,7483.331473,...,13098.022885,11808.869978,5437.448132,5850.107931,13328.211472,1509.636648,12153.781084,8706.628679,16112.34184,Cinema
4,Alexander Muir Memorial Gardens,Nature/ Park,43.721534,-79.401164,5331.452244,1881.686648,3750.872623,8605.053916,4572.299847,10452.763811,...,10482.194401,15071.552084,8223.163554,6921.82332,11352.631703,7559.549111,12085.672645,11615.631606,10821.752287,Outdoor Attr


#### 5.6 Community Centers

In [31]:
comm_ctr_df = pd.read_csv(comm_ctr_dir)
comm_ctr_df.head()

Unnamed: 0,_id,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,OFFICE_PHONE,EMAIL,WEBSITE,ELIGIBILITY,DESCRIPTION_SERVICE,...,DATE_UPDATED,ADDRESS_POINT_ID,X,Y,LONGITUDE,LATITUDE,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1,505,Community Centre 55,"97 Main St, Toronto, ON M4E 2V6","East End-Danforth, 62",416-691-1113,,"<a href=""http://www.centre55.com"" target=""_bla...",Open to all,Community centre * community development * bef...,...,2015-11-12T05:00:00,836457,,,,,97 Main St,former Toronto,M4E 2V6,"{u'type': u'Point', u'coordinates': (-79.29913..."
1,2,745,519 Community Centre,"519 Church St, Toronto, ON M4Y 2C9","Church-Yonge Corridor, 75",416-392-6874,info@The519.org,"<a href=""http://www.The519.org"" target=""_blank...",Open to all,Multiservice centre offering social and recrea...,...,2016-02-22T05:00:00,7731584,,,,,519 Church St,former Toronto,M4Y 2C9,"{u'type': u'Point', u'coordinates': (-79.38094..."
2,3,747,Applegrove Community Complex,"Inside Duke of Connaught Public School, 60 Woo...","Greenwood-Coxwell, 65",416-461-8143,applegrove@applegrovecc.ca,"<a href=""http://www.ApplegroveCC.ca"" target=""_...",Open to all,Community centre * <b>children's programs</b> ...,...,2015-09-16T04:00:00,6362824,,,,,60 Woodfield Rd,former Toronto,M4L 2W6,"{u'type': u'Point', u'coordinates': (-79.32199..."
3,4,748,Arab Community Centre of Toronto,"555 Burnhamthorpe Rd, Ste 209, Toronto, ON ...","Etobicoke West Mall, 13",416-231-7746,Info@arabnewcomers.org,"<a href=""http://www.arabcommunitycentre.com"" t...",Open to everyone,<b>Settlement Services</b> (formerly Immigrant...,...,2015-09-11T04:00:00,991045,,,,,555 Burnhamthorpe Rd,Etobicoke,M9C 2Y3,"{u'type': u'Point', u'coordinates': (-79.56802..."
4,5,749,BBYO Ontario,"Prosserman Jewish Community Centre, 4588 Bathu...","Westminster-Branson, 35",416-398-2004,info@bbyo.ca,"<a href=""http://www.bbyo.ca"" target=""_blank"">w...",Jewish students in grades 6-12,"Social, athletic, cultural and religious activ...",...,2015-04-28T04:00:00,487320,,,,,4588 Bathurst St,North York,M2R 1W6,"{u'type': u'Point', u'coordinates': (-79.44166..."


##### 5.6.1 Clean the dataset (remove unwanted columns, extract lat long from geometry column)

In [32]:
comm_ctr_df = comm_ctr_df[["AGENCY_NAME","geometry"]]
print("The dataset contains {} rows and {} columns".format(comm_ctr_df.shape[0],comm_ctr_df.shape[1]))
comm_ctr_df.head()

The dataset contains 152 rows and 2 columns


Unnamed: 0,AGENCY_NAME,geometry
0,Community Centre 55,"{u'type': u'Point', u'coordinates': (-79.29913..."
1,519 Community Centre,"{u'type': u'Point', u'coordinates': (-79.38094..."
2,Applegrove Community Complex,"{u'type': u'Point', u'coordinates': (-79.32199..."
3,Arab Community Centre of Toronto,"{u'type': u'Point', u'coordinates': (-79.56802..."
4,BBYO Ontario,"{u'type': u'Point', u'coordinates': (-79.44166..."


In [33]:
comm_lat = []
comm_lon = []
for i in range(comm_ctr_df.geometry.shape[0]):
    coords = comm_ctr_df.geometry[i].split(r"'coordinates': (")[1].split(r")")[0].split(",")
    comm_lat.append(float(coords[1]))
    comm_lon.append(float(coords[0]))

comm_ctr_df["comm_lat"] = comm_lat
comm_ctr_df["comm_lon"] = comm_lon

comm_ctr_df.drop(["geometry"],axis=1,inplace=True)
print("The dataset contains {} rows and {} columns".format(comm_ctr_df.shape[0],comm_ctr_df.shape[1]))
comm_ctr_df.head()

The dataset contains 152 rows and 3 columns


Unnamed: 0,AGENCY_NAME,comm_lat,comm_lon
0,Community Centre 55,43.682093,-79.299131
1,519 Community Centre,43.666552,-79.380943
2,Applegrove Community Complex,43.666287,-79.321999
3,Arab Community Centre of Toronto,43.644644,-79.568023
4,BBYO Ontario,43.762823,-79.441666


#### 5.6.2 Calculate distance of each community center to the center of the neighborhood

In [34]:
calc_distance_df(TO_df = comm_ctr_df,TO_lat = "comm_lat" ,TO_lon ="comm_lon",
                 FROM_df = neighborhood_df,FROM_lat = "LATITUDE",FROM_lon = "LONGITUDE",FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [35]:
print("The dataset contains {} rows and {} columns".format(comm_ctr_df.shape[0],comm_ctr_df.shape[1]))
comm_ctr_df.head()

The dataset contains 152 rows and 143 columns


Unnamed: 0,AGENCY_NAME,comm_lat,comm_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Community Centre 55,43.682093,-79.299131,10207.392595,8787.574786,7987.067086,17892.47421,13239.189576,16115.949599,12914.456607,...,10415.922874,6296.761262,20425.011353,13984.788932,13899.052172,5750.998471,8524.401472,20140.985879,17346.571189,12681.655507
1,519 Community Centre,43.666552,-79.380943,3774.672674,4614.012986,2732.704677,14039.525918,8141.911408,9339.467924,10365.922479,...,4670.018122,11614.913932,13605.792134,7300.399726,7602.222962,11723.242178,1952.854316,13933.459628,10556.412088,15328.654412
2,Applegrove Community Complex,43.666287,-79.321999,8431.44112,7841.079383,6570.985137,17405.768558,12147.068404,14073.156075,12811.640979,...,8966.218522,8651.198301,18223.730344,12054.219146,12242.92828,8248.424672,6231.861983,18570.970587,15277.234394,14394.554758
3,Arab Community Centre of Toronto,43.644644,-79.568023,12041.342234,14844.072648,14539.258701,14889.792576,11854.46516,5977.812819,16785.399116,...,12452.526135,25872.445354,2348.940357,8208.115747,9029.062913,26383.64654,13813.884299,6253.535711,4738.417377,26466.465759
4,BBYO Ontario,43.762823,-79.441666,9632.836331,7150.479971,9045.743952,3815.835533,5492.725205,12504.224391,1661.037314,...,8018.506758,13964.884999,16544.495958,10817.223783,8895.435091,15178.098233,12657.527668,11113.235107,13354.072009,10901.524391


In [740]:
comm_ctr_df["Group"]="Museum"
comm_ctr_df.head()

Unnamed: 0,AGENCY_NAME,comm_lat,comm_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Community Centre 55,43.682093,-79.299131,10207.392595,8787.574786,7987.067086,17892.47421,13239.189576,16115.949599,12914.456607,...,6296.761262,20425.011353,13984.788932,13899.052172,5750.998471,8524.401472,20140.985879,17346.571189,12681.655507,Museum
1,519 Community Centre,43.666552,-79.380943,3774.672674,4614.012986,2732.704677,14039.525918,8141.911408,9339.467924,10365.922479,...,11614.913932,13605.792134,7300.399726,7602.222962,11723.242178,1952.854316,13933.459628,10556.412088,15328.654412,Museum
2,Applegrove Community Complex,43.666287,-79.321999,8431.44112,7841.079383,6570.985137,17405.768558,12147.068404,14073.156075,12811.640979,...,8651.198301,18223.730344,12054.219146,12242.92828,8248.424672,6231.861983,18570.970587,15277.234394,14394.554758,Museum
3,Arab Community Centre of Toronto,43.644644,-79.568023,12041.342234,14844.072648,14539.258701,14889.792576,11854.46516,5977.812819,16785.399116,...,25872.445354,2348.940357,8208.115747,9029.062913,26383.64654,13813.884299,6253.535711,4738.417377,26466.465759,Museum
4,BBYO Ontario,43.762823,-79.441666,9632.836331,7150.479971,9045.743952,3815.835533,5492.725205,12504.224391,1661.037314,...,13964.884999,16544.495958,10817.223783,8895.435091,15178.098233,12657.527668,11113.235107,13354.072009,10901.524391,Museum


#### 5.7 Request venues form Foursquare [requested: 29.10.2020]

In [40]:
VERSION = '20180605' # Foursquare API version
LIMIT = 100000 # A default Foursquare API limit value

In [48]:
def getNearbyVenues(names, latitudes, longitudes, radius=2500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [49]:
toronto_venues = getNearbyVenues(neighborhood_df['AREA_NAME'], neighborhood_df['LATITUDE'], neighborhood_df['LONGITUDE'], radius=2500)
toronto_venues.head()

Wychwood (94)
Yonge-Eglinton (100)
Yonge-St.Clair (97)
York University Heights (27)
Yorkdale-Glen Park (31)
Lambton Baby Point (114)
Lansing-Westgate (38)
Lawrence Park North (105)
Lawrence Park South (103)
Leaside-Bennington (56)
Little Portugal (84)
Long Branch (19)
Malvern (132)
Maple Leaf (29)
Markland Wood (12)
Milliken (130)
Mimico (includes Humber Bay Shores) (17)
Morningside (135)
Moss Park (73)
Mount Dennis (115)
Mount Olive-Silverstone-Jamestown (2)
Mount Pleasant East (99)
Mount Pleasant West (104)
New Toronto (18)
Newtonbrook East (50)
Newtonbrook West (36)
Niagara (82)
North Riverdale (68)
North St.James Town (74)
O'Connor-Parkview (54)
Oakridge (121)
Oakwood Village (107)
Old East York (58)
Palmerston-Little Italy (80)
Parkwoods-Donalda (45)
Pelmo Park-Humberlea (23)
Playter Estates-Danforth (67)
Pleasant View (46)
Princess-Rosethorn (10)
Regent Park (72)
Rexdale-Kipling (4)
Rockcliffe-Smythe (111)
Roncesvalles (86)
Rosedale-Moore Park (98)
Rouge (131)
Runnymede-Bloor Wes

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wychwood (94),43.676919,-79.425515,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market
1,Wychwood (94),43.676919,-79.425515,Wychwood Barns,43.680028,-79.42381,Event Space
2,Wychwood (94),43.676919,-79.425515,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant
3,Wychwood (94),43.676919,-79.425515,The Stockyards,43.68157,-79.42621,BBQ Joint
4,Wychwood (94),43.676919,-79.425515,CocoaLatte,43.681768,-79.425158,Café


In [50]:
print("There are {} venues in Toronto".format(toronto_venues.shape[0]))

There are 12170 venues in Toronto


#### 5.7.1 Remove duplicates in the dataset

In [56]:
print("There are {} venues duplicated".format(toronto_venues.duplicated(subset = ['Venue','Venue Latitude','Venue Longitude']).sum()))

7869


In [58]:
toronto_venues = toronto_venues.drop_duplicates(subset=['Venue','Venue Latitude','Venue Longitude'])
toronto_venues.drop(["Neighborhood","Neighborhood Latitude","Neighborhood Longitude"],axis=1,inplace=True)
print("There are {} venues in Toronto".format(toronto_venues.shape[0]))
toronto_venues.head()

There are 4301 venues in Toronto


Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market
1,Wychwood Barns,43.680028,-79.42381,Event Space
2,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant
3,The Stockyards,43.68157,-79.42621,BBQ Joint
4,CocoaLatte,43.681768,-79.425158,Café


In [79]:
toronto_venues.rename(columns={"Venue Latitude": "Latitude", "Venue Longitude": "Longitude","Venue Category":"Category"},inplace=True)
toronto_venues.reset_index(drop=True,inplace=True)
toronto_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market
1,Wychwood Barns,43.680028,-79.42381,Event Space
2,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant
3,The Stockyards,43.68157,-79.42621,BBQ Joint
4,CocoaLatte,43.681768,-79.425158,Café


#### 5.7.2 Calculate the distance from each venue to the center of the neighborhood

In [82]:
calc_distance_df(TO_df=toronto_venues, TO_lat="Latitude", TO_lon="Longitude",
                 FROM_df = neighborhood_df, FROM_lat = "LATITUDE", FROM_lon = "LONGITUDE", FROM_name = "AREA_NAME")

1 set(s) calculated! Remaining:  139 set(s)
2 set(s) calculated! Remaining:  138 set(s)
3 set(s) calculated! Remaining:  137 set(s)
4 set(s) calculated! Remaining:  136 set(s)
5 set(s) calculated! Remaining:  135 set(s)
6 set(s) calculated! Remaining:  134 set(s)
7 set(s) calculated! Remaining:  133 set(s)
8 set(s) calculated! Remaining:  132 set(s)
9 set(s) calculated! Remaining:  131 set(s)
10 set(s) calculated! Remaining:  130 set(s)
11 set(s) calculated! Remaining:  129 set(s)
12 set(s) calculated! Remaining:  128 set(s)
13 set(s) calculated! Remaining:  127 set(s)
14 set(s) calculated! Remaining:  126 set(s)
15 set(s) calculated! Remaining:  125 set(s)
16 set(s) calculated! Remaining:  124 set(s)
17 set(s) calculated! Remaining:  123 set(s)
18 set(s) calculated! Remaining:  122 set(s)
19 set(s) calculated! Remaining:  121 set(s)
20 set(s) calculated! Remaining:  120 set(s)
21 set(s) calculated! Remaining:  119 set(s)
22 set(s) calculated! Remaining:  118 set(s)
23 set(s) calculate

In [83]:
print("The dataset contains {} rows and {} columns".format(toronto_venues.shape[0],toronto_venues.shape[1]))
toronto_venues.head()

The dataset contains 4301 rows and 144 columns


Unnamed: 0,Venue,Latitude,Longitude,Category,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,1299.421379,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195
1,Wychwood Barns,43.680028,-79.42381,Event Space,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,1298.26494,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149
2,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant,546.646894,3339.736841,2635.756678,10566.987177,4360.766585,5997.584387,...,1152.430441,13998.37515,10616.876984,3708.11397,3446.019762,14476.955589,3996.890973,9773.353638,7242.20624,15756.791034
3,The Stockyards,43.68157,-79.42621,BBQ Joint,519.790579,3150.136931,2389.388809,10627.907583,4441.827253,6238.746856,...,1095.244545,13757.214632,10853.848663,3951.542755,3675.877431,14231.817084,3892.102904,9997.185665,7484.185515,15570.156688
4,CocoaLatte,43.681768,-79.425158,Café,539.513265,3083.635083,2301.847951,10649.181119,4471.940719,6324.807821,...,1086.062642,13671.290535,10938.538717,4038.318142,3757.399654,14144.613497,3859.596992,10076.193162,7570.482327,15503.185185


### 5.7.3 Simplify categories

In [94]:
print("There are {} unique venue categories".format(len(toronto_venues["Category"].unique())))

There are 324 unique venue categories


In [270]:
def simplify_categories(full_list, cat_list,out_list):
    for i in cat_list:
        temp_posit = [i in a for a in full_list] #Look for matching entries
        temp_pos_vals = (np.array(full_list)[np.array(temp_posit)]).tolist()
        out_list.extend(temp_pos_vals) #Add the entries to the specific collection

        
        for j in temp_pos_vals:
            full_list.remove(j) #Remove the entries from the initial collection 



### 5.7.3.1 All Categories

In [671]:
temp_cats = toronto_venues["Category"].unique().tolist()


### 5.7.3.2 Food

Latin American

In [672]:
lat_am=[]
lat_am_cat = ['South American','Mexican','Latin American','Caribbean', 'Brazilian','Cuban','Peruvian','Argentinian','Taco Place','Burrito','Creole']
simplify_categories(full_list=temp_cats, cat_list=lat_am_cat, out_list=lat_am)
lat_am

['South American Restaurant',
 'Mexican Restaurant',
 'Latin American Restaurant',
 'Caribbean Restaurant',
 'Brazilian Restaurant',
 'Cuban Restaurant',
 'Peruvian Restaurant',
 'Argentinian Restaurant',
 'Taco Place',
 'Burrito Place',
 'Cajun / Creole Restaurant']

Asian

In [673]:
asian=[]
asian_cat = ['Thai','Sushi','Japanese','Asian','Xinjiang', 'Noodle', 'Hakka','Dumpling', 'Cantonese', 'Malay','Ramen',
       'Hong Kong','Indonesian','Korean', 'Chinese','Filipino','Vietnamese','Taiwanese']
simplify_categories(full_list=temp_cats, cat_list=asian_cat, out_list=asian)
asian


['Thai Restaurant',
 'Sushi Restaurant',
 'Japanese Restaurant',
 'Asian Restaurant',
 'Xinjiang Restaurant',
 'Noodle House',
 'Hakka Restaurant',
 'Dumpling Restaurant',
 'Cantonese Restaurant',
 'Malay Restaurant',
 'Ramen Restaurant',
 'Hong Kong Restaurant',
 'Indonesian Restaurant',
 'Korean Restaurant',
 'Chinese Restaurant',
 'Filipino Restaurant',
 'Vietnamese Restaurant',
 'Taiwanese Restaurant']

Indian Pensinsula

In [674]:
indo=[]
indo_cat = ['Indian','Pakistan','Sri Lankan','Tibetan']
simplify_categories(full_list=temp_cats, cat_list=indo_cat, out_list=indo)
indo


['Indian Restaurant',
 'Pakistani Restaurant',
 'Sri Lankan Restaurant',
 'Tibetan Restaurant']

Africa and middle east

In [675]:
mid_east=[]
mid_east_cat = ['Afghan','Syrian','Falafel','Doner', 'Jewish', 'Ethiopian', 'Egyptian',
       'Persian','Middle Eastern','Turkish','African']
simplify_categories(full_list=temp_cats, cat_list=mid_east_cat, out_list=mid_east)
mid_east

['Afghan Restaurant',
 'Syrian Restaurant',
 'Falafel Restaurant',
 'Doner Restaurant',
 'Jewish Restaurant',
 'Ethiopian Restaurant',
 'Egyptian Restaurant',
 'Persian Restaurant',
 'Middle Eastern Restaurant',
 'Turkish Restaurant',
 'African Restaurant']

Iberic and Mediteranean

In [676]:
medit = []
medit_cat = ['Spanish','Portuguese','Greek','Mediterranean','Italian','French','Pizza','Tapas']
simplify_categories(full_list=temp_cats, cat_list=medit_cat, out_list=medit)
medit


['Spanish Restaurant',
 'Portuguese Restaurant',
 'Greek Restaurant',
 'Mediterranean Restaurant',
 'Italian Restaurant',
 'French Restaurant',
 'Pizza Place',
 'Tapas Restaurant']

Other european

In [677]:
ctr_eur = []
ctr_eur_cats = ['German','Swiss','Switzerland','Caucasian','European','Hungarian']
simplify_categories(full_list=temp_cats, cat_list=ctr_eur_cats, out_list=ctr_eur)
ctr_eur


['German Restaurant',
 'Swiss Restaurant',
 'Caucasian Restaurant',
 'Eastern European Restaurant',
 'Modern European Restaurant',
 'Hungarian Restaurant']

Other food

In [678]:
gral_food = []
gral_food_cats = ["BBQ",'Vegan','Food','Diner','Restaurant','Burger','Steakhouse','Soup','Salad','Fried','Snack','Bistro','Poke','Pide','Wings Joint']
simplify_categories(full_list=temp_cats, cat_list=gral_food_cats, out_list=gral_food)
gral_food


['BBQ Joint',
 'Vegetarian / Vegan Restaurant',
 'Health Food Store',
 'Food & Drink Shop',
 'Fast Food Restaurant',
 'Food Court',
 'Comfort Food Restaurant',
 'Food Truck',
 'Food Stand',
 'Diner',
 'American Restaurant',
 'Restaurant',
 'Seafood Restaurant',
 'New American Restaurant',
 'Hawaiian Restaurant',
 'Theme Restaurant',
 'Dim Sum Restaurant',
 'Hotpot Restaurant',
 'Halal Restaurant',
 'Szechuan Restaurant',
 'Burger Joint',
 'Steakhouse',
 'Soup Place',
 'Salad Place',
 'Fried Chicken Joint',
 'Snack Place',
 'Bistro',
 'Poke Place',
 'Pide Place',
 'Wings Joint']

### 5.7.3.3 Indoor Attractions

Cinema and theaters

In [679]:
cinema = []
cinema_cats = ["Theater",'Circus','Amphitheater']
simplify_categories(full_list=temp_cats, cat_list=cinema_cats, out_list=cinema)
cinema


['Indie Movie Theater', 'Movie Theater', 'Theater', 'Circus', 'Amphitheater']

Museums

In [680]:
museum=[]
museum_cat=["Museum",'Historic','Castle','Recreation Center','Community Center',"Gallery"]
simplify_categories(full_list=temp_cats, cat_list=museum_cat, out_list=museum)
museum


['Museum',
 'Science Museum',
 'History Museum',
 'Historic Site',
 'Castle',
 'Recreation Center',
 'Community Center',
 'Art Gallery']

Spas and Saloons

In [681]:
beauty=[]
beauty_cat=["Beauty",'Parlor','Spa','Salon','Massage','Bath']
simplify_categories(full_list=temp_cats, cat_list=beauty_cat, out_list=beauty)
beauty


['Health & Beauty Service',
 'Tattoo Parlor',
 'Event Space',
 'Spa',
 'Salon / Barbershop',
 'Nail Salon',
 'Massage Studio',
 'Bath House']

Other indoor attractions

In [682]:
attr_in = []
attr_in_cat = ['Escape Room','Laser','Indoor','General Entertainment','Bowling','Arcade']
simplify_categories(full_list=temp_cats, cat_list=attr_in_cat, out_list=attr_in)
attr_in


['Escape Room',
 'Laser Tag',
 'Indoor Play Area',
 'General Entertainment',
 'Bowling Alley',
 'Arcade']

### 5.7.3.4 Cafe and Bar

Coffee

In [683]:
cafe = []
cafe_cats = ["Cafe",'Bakery', 'Sandwich','Breakfast','Tea','Café','Creperie']
simplify_categories(full_list=temp_cats, cat_list=cafe_cats, out_list=cafe)
cafe


['Gaming Cafe',
 'Bakery',
 'Sandwich Place',
 'Breakfast Spot',
 'Tea Room',
 'Bubble Tea Shop',
 'Café',
 'Creperie']

Pubs and Nightlife

In [684]:
pubs = []
pubs_cats = ["Bar","Beer",'Pub','Nightclub','pub','Brewery','Club','Lounge','Speakeasy','Music Venue','Performing Arts Venue']
simplify_categories(full_list=temp_cats, cat_list=pubs_cats, out_list=pubs)
pubs


['Bar',
 'Cocktail Bar',
 'Wine Bar',
 'Beer Bar',
 'Juice Bar',
 'Sports Bar',
 'Whisky Bar',
 'Hotel Bar',
 'Hookah Bar',
 'Beach Bar',
 'Dive Bar',
 'Karaoke Bar',
 'Beer Store',
 'Pub',
 'Nightclub',
 'Gastropub',
 'Brewery',
 'Comedy Club',
 'Sports Club',
 'Rock Club',
 'Lounge',
 'Speakeasy',
 'Music Venue',
 'Performing Arts Venue']

### 5.7.3.5 Sports

In [685]:
sports = []
sports_cats = ["Gym",'gym', 'sports','Sports','Pool','Yoga','Ski','Rock Climbing','Dance','Martial Arts','Court','Curling','Rink','Racetrack','Racecourse']
simplify_categories(full_list=temp_cats, cat_list=sports_cats, out_list=sports)
sports


['Gym',
 'Gym / Fitness Center',
 'Climbing Gym',
 'Gym Pool',
 'Gymnastics Gym',
 'Athletics & Sports',
 'Pool Hall',
 'Pool',
 'Yoga Studio',
 'Ski Chalet',
 'Ski Area',
 'Rock Climbing Spot',
 'Dance Studio',
 'Martial Arts School',
 'Basketball Court',
 'Tennis Court',
 'Badminton Court',
 'Curling Ice',
 'Skating Rink',
 'Racetrack',
 'Racecourse']

### 5.7.3.6 Outdoor Attractions

General outdoor attractions

In [686]:
attr_out = []
attr_out_cat=['Track','Dog Run','Lake','Trail','Golf','Garden','Park','Zoo','Monument',"Neighborhood",'Harbor','Fountain','Aquarium','Outdoors','Scenic','Field','Beach','Campground','Playground','Farm','Stables']
simplify_categories(full_list=temp_cats, cat_list=attr_out_cat, out_list=attr_out)
attr_out


['Track',
 'Dog Run',
 'Lake',
 'Trail',
 'Golf Course',
 'Garden',
 'Botanical Garden',
 'Garden Center',
 'Park',
 'Theme Park',
 'National Park',
 'Skate Park',
 'Zoo',
 'Zoo Exhibit',
 'Monument / Landmark',
 'Neighborhood',
 'Harbor / Marina',
 'Fountain',
 'Aquarium',
 'Other Great Outdoors',
 'Scenic Lookout',
 'Field',
 'Paintball Field',
 'Baseball Field',
 'Soccer Field',
 'Hockey Field',
 'Beach',
 'Campground',
 'Playground',
 'Farmers Market',
 'Farm',
 'Stables']

Attractions/Venues with lots of people

In [687]:
attr_xl = []
attr_xl_cat=['Stadium','Arena','Hall','Airport','Church']
simplify_categories(full_list=temp_cats, cat_list=attr_xl_cat, out_list=attr_xl)
attr_xl


['Tennis Stadium',
 'Soccer Stadium',
 'Basketball Stadium',
 'Baseball Stadium',
 'Hockey Arena',
 'Concert Hall',
 'Airport',
 'Church']

### 5.7.3.7 Commerce

Markets

In [688]:
markets=[]
market_cat = ['market','Market','Grocery','Gourmet','Butcher','Bodega']
simplify_categories(full_list=temp_cats, cat_list=market_cat, out_list=markets)
markets


['Supermarket',
 'Fish Market',
 'Flea Market',
 'Market',
 'Grocery Store',
 'Organic Grocery',
 'Gourmet Shop',
 'Butcher',
 'Deli / Bodega']

Banks

In [689]:
bank=[]
bank_cat=["Bank","Financial"]
simplify_categories(full_list=temp_cats, cat_list=bank_cat, out_list=bank)
bank


['Bank', 'Financial or Legal Service']

Hotels

In [690]:
hotel=[]
hotel_cat=['Hotel', 'Hostel']
simplify_categories(full_list=temp_cats, cat_list=hotel_cat, out_list=hotel)
hotel


['Hotel', 'Hostel']

Medical

In [691]:
pharm=[]
pharm_cat=['Pharmacy',"Doctor's Office"]
simplify_categories(full_list=temp_cats, cat_list=pharm_cat, out_list=pharm)
pharm


['Pharmacy', "Doctor's Office"]

General Shops

In [692]:
shops=[]
shops_cat = ['Shop','shop','Store','store','Plaza','Studio','Boutique','Service','Laundromat','Auto Dealership', 'Rental Car Location']
simplify_categories(full_list=temp_cats, cat_list=shops_cat, out_list=shops)
shops


['Ice Cream Shop',
 'Coffee Shop',
 'Cupcake Shop',
 'Dessert Shop',
 'Gift Shop',
 'Frozen Yogurt Shop',
 'Pastry Shop',
 'Bagel Shop',
 'Flower Shop',
 'Sporting Goods Shop',
 'Smoke Shop',
 'Comic Shop',
 'Shopping Mall',
 'Cosmetics Shop',
 'Chocolate Shop',
 'Bike Shop',
 'Fish & Chips Shop',
 'Antique Shop',
 'Bridal Shop',
 'Miscellaneous Shop',
 'Donut Shop',
 'Hobby Shop',
 'Smoothie Shop',
 'Print Shop',
 'Board Shop',
 'Record Shop',
 'Shopping Plaza',
 'Optical Shop',
 'Supplement Shop',
 'Cheese Shop',
 'Souvlaki Shop',
 'Automotive Shop',
 'Mobile Phone Shop',
 'Music Store',
 'Arts & Crafts Store',
 'Liquor Store',
 'Baby Store',
 'Jewelry Store',
 "Men's Store",
 'Furniture / Home Store',
 'Discount Store',
 'Hardware Store',
 'Clothing Store',
 'Toy / Game Store',
 'Electronics Store',
 'Department Store',
 'Kitchen Supply Store',
 'Warehouse Store',
 "Women's Store",
 'Pet Store',
 'Lingerie Store',
 'Thrift / Vintage Store',
 'Shoe Store',
 'Convenience Store',
 'Pap

### 5.7.3.8 Unused categories
Categories that do not provide useful information of the hood regarding their commodities (e.g., Building).
Additionally, categories for which we have state annotated data, which should be better curated (e.g., Schools, public transport)

In [693]:
unused = []
unused_cat=['Station','Office','Housing','Residential','Construction','Bus','Startup',
            'Cemetery','Street Art','Building','Moving Target','Distribution Center','Intersection','Storage Facility','Bridge',"School","school","College"]
simplify_categories(full_list=temp_cats, cat_list=unused_cat, out_list=unused)
unused

['Gas Station',
 'Train Station',
 'Bus Station',
 'Light Rail Station',
 'Tram Station',
 'Metro Station',
 'Office',
 'Housing Development',
 'Residential Building (Apartment / Condo)',
 'Construction & Landscaping',
 'Bus Stop',
 'Bus Line',
 'Tech Startup',
 'Cemetery',
 'Street Art',
 'Government Building',
 'Building',
 'Moving Target',
 'Distribution Center',
 'Intersection',
 'Storage Facility',
 'Bridge',
 'School',
 'Music School',
 'College Quad',
 'College Rec Center']

### 5.7.4 Collect all categories

Number of categories

In [696]:
len([lat_am,asian,indo,mid_east,medit,ctr_eur,gral_food,cinema,museum,beauty,attr_in,cafe,pubs,sports,attr_out,attr_xl,markets,bank,hotel,pharm,shops,unused])

22

Assign each category to a dictionary

In [697]:
food_cats = {"Food Lat Am":lat_am, "Food Asian":asian, "Food Indo":indo,  
            "Food Afr ME":mid_east, "Food Mediterrean":medit, "Food Other Europe":ctr_eur, "Food Gral":gral_food,}

indoor_cats = {"Cinema":cinema, "Museum":museum, "Salon":beauty, "Indoor Attr":attr_in}

cafes_cats = {"Cafe":cafe, "Pub":pubs}

sports_cats = {"Sports":sports}

outdoor_cats = {"Outdoor Attr":attr_out, "Large Oudoor Attr":attr_xl}

commerce_cats = {"Markets":markets, "Banks":bank, "Hotel":hotel, "Medical":pharm, "Shops":shops}

unused_cats = {"Non usable":unused}

### 5.7.5 Add new category to the venues dataframe

Define new column containing the simplified categories

In [731]:
toronto_venues["Group"]="TBD"
toronto_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195,TBD
1,Wychwood Barns,43.680028,-79.42381,Event Space,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149,TBD
2,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant,546.646894,3339.736841,2635.756678,10566.987177,4360.766585,5997.584387,...,13998.37515,10616.876984,3708.11397,3446.019762,14476.955589,3996.890973,9773.353638,7242.20624,15756.791034,TBD
3,The Stockyards,43.68157,-79.42621,BBQ Joint,519.790579,3150.136931,2389.388809,10627.907583,4441.827253,6238.746856,...,13757.214632,10853.848663,3951.542755,3675.877431,14231.817084,3892.102904,9997.185665,7484.185515,15570.156688,TBD
4,CocoaLatte,43.681768,-79.425158,Café,539.513265,3083.635083,2301.847951,10649.181119,4471.940719,6324.807821,...,13671.290535,10938.538717,4038.318142,3757.399654,14144.613497,3859.596992,10076.193162,7570.482327,15503.185185,TBD


In [732]:
toronto_venues['Group'].unique()

array(['TBD'], dtype=object)

Convert TBD in Group Column to actual Group (defined in the dictionaries)

In [733]:
simp_cats = [food_cats,indoor_cats,cafes_cats,sports_cats,outdoor_cats,commerce_cats,unused_cats]
for j in simp_cats:
    for i in list(j):
        toronto_venues.loc[toronto_venues['Category'].isin(j[i]),'Group']=i
    
toronto_venues['Group'].unique()    

array(['Outdoor Attr', 'Salon', 'Food Indo', 'Food Gral', 'Cafe', 'Shops',
       'Food Mediterrean', 'Markets', 'Pub', 'Food Lat Am', 'Museum',
       'Food Asian', 'Food Afr ME', 'Cinema', 'Sports', 'Non usable',
       'Banks', 'Medical', 'Hotel', 'Large Oudoor Attr', 'Indoor Attr',
       'Food Other Europe'], dtype=object)

In [736]:
toronto_venues[["Venue",'Category','Group']].head(10)

Unnamed: 0,Venue,Category,Group
0,Wychwood Barns Farmers' Market,Farmers Market,Outdoor Attr
1,Wychwood Barns,Event Space,Salon
2,Pukka Restaurant,Indian Restaurant,Food Indo
3,The Stockyards,BBQ Joint,Food Gral
4,CocoaLatte,Café,Cafe
5,Bar Ape,Ice Cream Shop,Shops
6,Contra Cafe,Café,Cafe
7,Ferro Bar Cafe,Italian Restaurant,Food Mediterrean
8,Fiesta Farms,Grocery Store,Markets
9,Baker and Scone,Café,Cafe


## 6. Collect all commodity datasets into a single dataset

### 6.1 Uniform columns and column names in each dataset: Include only Venue:"Name of the venue", Latitude, Longitude, Group, Neighborhood Distance

6.1.1 Column Order

In [754]:
column_order = ['Venue', 
 'Latitude',
 'Longitude',
 'Group',
 'Wychwood (94)',
 'Yonge-Eglinton (100)',
 'Yonge-St.Clair (97)',
 'York University Heights (27)',
 'Yorkdale-Glen Park (31)',
 'Lambton Baby Point (114)',
 'Lansing-Westgate (38)',
 'Lawrence Park North (105)',
 'Lawrence Park South (103)',
 'Leaside-Bennington (56)',
 'Little Portugal (84)',
 'Long Branch (19)',
 'Malvern (132)',
 'Maple Leaf (29)',
 'Markland Wood (12)',
 'Milliken (130)',
 'Mimico (includes Humber Bay Shores) (17)',
 'Morningside (135)',
 'Moss Park (73)',
 'Mount Dennis (115)',
 'Mount Olive-Silverstone-Jamestown (2)',
 'Mount Pleasant East (99)',
 'Mount Pleasant West (104)',
 'New Toronto (18)',
 'Newtonbrook East (50)',
 'Newtonbrook West (36)',
 'Niagara (82)',
 'North Riverdale (68)',
 'North St.James Town (74)',
 "O'Connor-Parkview (54)",
 'Oakridge (121)',
 'Oakwood Village (107)',
 'Old East York (58)',
 'Palmerston-Little Italy (80)',
 'Parkwoods-Donalda (45)',
 'Pelmo Park-Humberlea (23)',
 'Playter Estates-Danforth (67)',
 'Pleasant View (46)',
 'Princess-Rosethorn (10)',
 'Regent Park (72)',
 'Rexdale-Kipling (4)',
 'Rockcliffe-Smythe (111)',
 'Roncesvalles (86)',
 'Rosedale-Moore Park (98)',
 'Rouge (131)',
 'Runnymede-Bloor West Village (89)',
 'Rustic (28)',
 'Scarborough Village (139)',
 'South Parkdale (85)',
 'South Riverdale (70)',
 'St.Andrew-Windfields (40)',
 'Steeles (116)',
 'Stonegate-Queensway (16)',
 "Tam O'Shanter-Sullivan (118)",
 'Taylor-Massey (61)',
 'The Beaches (63)',
 'Thistletown-Beaumond Heights (3)',
 'Thorncliffe Park (55)',
 'Trinity-Bellwoods (81)',
 'University (79)',
 'Victoria Village (43)',
 'Waterfront Communities-The Island (77)',
 'West Hill (136)',
 'West Humber-Clairville (1)',
 'Westminster-Branson (35)',
 'Weston (113)',
 'Weston-Pellam Park (91)',
 'Wexford/Maryvale (119)',
 'Willowdale East (51)',
 'Willowdale West (37)',
 'Willowridge-Martingrove-Richview (7)',
 'Woburn (137)',
 'Woodbine Corridor (64)',
 'Woodbine-Lumsden (60)',
 'Agincourt North (129)',
 'Agincourt South-Malvern West (128)',
 'Alderwood (20)',
 'Annex (95)',
 'Banbury-Don Mills (42)',
 'Bathurst Manor (34)',
 'Bay Street Corridor (76)',
 'Bayview Village (52)',
 'Bayview Woods-Steeles (49)',
 'Bedford Park-Nortown (39)',
 'Beechborough-Greenbrook (112)',
 'Bendale (127)',
 'Birchcliffe-Cliffside (122)',
 'Black Creek (24)',
 'Blake-Jones (69)',
 'Briar Hill-Belgravia (108)',
 'Bridle Path-Sunnybrook-York Mills (41)',
 'Broadview North (57)',
 'Brookhaven-Amesbury (30)',
 'Cabbagetown-South St.James Town (71)',
 'Caledonia-Fairbank (109)',
 'Casa Loma (96)',
 'Centennial Scarborough (133)',
 'Church-Yonge Corridor (75)',
 'Clairlea-Birchmount (120)',
 'Clanton Park (33)',
 'Cliffcrest (123)',
 'Corso Italia-Davenport (92)',
 'Danforth (66)',
 'Danforth East York (59)',
 'Don Valley Village (47)',
 'Dorset Park (126)',
 'Dovercourt-Wallace Emerson-Junction (93)',
 'Downsview-Roding-CFB (26)',
 'Dufferin Grove (83)',
 'East End-Danforth (62)',
 'Edenbridge-Humber Valley (9)',
 'Eglinton East (138)',
 'Elms-Old Rexdale (5)',
 'Englemount-Lawrence (32)',
 'Eringate-Centennial-West Deane (11)',
 'Etobicoke West Mall (13)',
 'Flemingdon Park (44)',
 'Forest Hill North (102)',
 'Forest Hill South (101)',
 'Glenfield-Jane Heights (25)',
 'Greenwood-Coxwell (65)',
 'Guildwood (140)',
 'Henry Farm (53)',
 'High Park North (88)',
 'High Park-Swansea (87)',
 'Highland Creek (134)',
 'Hillcrest Village (48)',
 'Humber Heights-Westmount (8)',
 'Humber Summit (21)',
 'Humbermede (22)',
 'Humewood-Cedarvale (106)',
 'Ionview (125)',
 'Islington-City Centre West (14)',
 'Junction Area (90)',
 'Keelesdale-Eglinton West (110)',
 'Kennedy Park (124)',
 'Kensington-Chinatown (78)',
 'Kingsview Village-The Westway (6)',
 'Kingsway South (15)',
 "L'Amoreaux (117)"]

Toronto venues -> Remove category column

In [756]:
toronto_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Farmers Market,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195,Outdoor Attr
1,Wychwood Barns,43.680028,-79.42381,Event Space,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149,Salon
2,Pukka Restaurant,43.681055,-79.429187,Indian Restaurant,546.646894,3339.736841,2635.756678,10566.987177,4360.766585,5997.584387,...,13998.37515,10616.876984,3708.11397,3446.019762,14476.955589,3996.890973,9773.353638,7242.20624,15756.791034,Food Indo
3,The Stockyards,43.68157,-79.42621,BBQ Joint,519.790579,3150.136931,2389.388809,10627.907583,4441.827253,6238.746856,...,13757.214632,10853.848663,3951.542755,3675.877431,14231.817084,3892.102904,9997.185665,7484.185515,15570.156688,Food Gral
4,CocoaLatte,43.681768,-79.425158,Café,539.513265,3083.635083,2301.847951,10649.181119,4471.940719,6324.807821,...,13671.290535,10938.538717,4038.318142,3757.399654,14144.613497,3859.596992,10076.193162,7570.482327,15503.185185,Cafe


In [757]:
temp_toronto_venues = toronto_venues.drop(["Category"],axis=1)
temp_toronto_venues = temp_toronto_venues[column_order]
temp_toronto_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Outdoor Attr,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,1299.421379,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195
1,Wychwood Barns,43.680028,-79.42381,Salon,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,1298.26494,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149
2,Pukka Restaurant,43.681055,-79.429187,Food Indo,546.646894,3339.736841,2635.756678,10566.987177,4360.766585,5997.584387,...,1152.430441,13998.37515,10616.876984,3708.11397,3446.019762,14476.955589,3996.890973,9773.353638,7242.20624,15756.791034
3,The Stockyards,43.68157,-79.42621,Food Gral,519.790579,3150.136931,2389.388809,10627.907583,4441.827253,6238.746856,...,1095.244545,13757.214632,10853.848663,3951.542755,3675.877431,14231.817084,3892.102904,9997.185665,7484.185515,15570.156688
4,CocoaLatte,43.681768,-79.425158,Cafe,539.513265,3083.635083,2301.847951,10649.181119,4471.940719,6324.807821,...,1086.062642,13671.290535,10938.538717,4038.318142,3757.399654,14144.613497,3859.596992,10076.193162,7570.482327,15503.185185


Community Centers -> Rename columns

In [759]:
comm_ctr_df.head()

Unnamed: 0,AGENCY_NAME,comm_lat,comm_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Community Centre 55,43.682093,-79.299131,10207.392595,8787.574786,7987.067086,17892.47421,13239.189576,16115.949599,12914.456607,...,6296.761262,20425.011353,13984.788932,13899.052172,5750.998471,8524.401472,20140.985879,17346.571189,12681.655507,Museum
1,519 Community Centre,43.666552,-79.380943,3774.672674,4614.012986,2732.704677,14039.525918,8141.911408,9339.467924,10365.922479,...,11614.913932,13605.792134,7300.399726,7602.222962,11723.242178,1952.854316,13933.459628,10556.412088,15328.654412,Museum
2,Applegrove Community Complex,43.666287,-79.321999,8431.44112,7841.079383,6570.985137,17405.768558,12147.068404,14073.156075,12811.640979,...,8651.198301,18223.730344,12054.219146,12242.92828,8248.424672,6231.861983,18570.970587,15277.234394,14394.554758,Museum
3,Arab Community Centre of Toronto,43.644644,-79.568023,12041.342234,14844.072648,14539.258701,14889.792576,11854.46516,5977.812819,16785.399116,...,25872.445354,2348.940357,8208.115747,9029.062913,26383.64654,13813.884299,6253.535711,4738.417377,26466.465759,Museum
4,BBYO Ontario,43.762823,-79.441666,9632.836331,7150.479971,9045.743952,3815.835533,5492.725205,12504.224391,1661.037314,...,13964.884999,16544.495958,10817.223783,8895.435091,15178.098233,12657.527668,11113.235107,13354.072009,10901.524391,Museum


In [760]:
temp_comm_ctr_df = comm_ctr_df.rename(columns={"AGENCY_NAME": "Venue","comm_lat": "Latitude" ,"comm_lon": "Longitude"})
temp_comm_ctr_df = temp_comm_ctr_df[column_order]
temp_comm_ctr_df.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Community Centre 55,43.682093,-79.299131,Museum,10207.392595,8787.574786,7987.067086,17892.47421,13239.189576,16115.949599,...,10415.922874,6296.761262,20425.011353,13984.788932,13899.052172,5750.998471,8524.401472,20140.985879,17346.571189,12681.655507
1,519 Community Centre,43.666552,-79.380943,Museum,3774.672674,4614.012986,2732.704677,14039.525918,8141.911408,9339.467924,...,4670.018122,11614.913932,13605.792134,7300.399726,7602.222962,11723.242178,1952.854316,13933.459628,10556.412088,15328.654412
2,Applegrove Community Complex,43.666287,-79.321999,Museum,8431.44112,7841.079383,6570.985137,17405.768558,12147.068404,14073.156075,...,8966.218522,8651.198301,18223.730344,12054.219146,12242.92828,8248.424672,6231.861983,18570.970587,15277.234394,14394.554758
3,Arab Community Centre of Toronto,43.644644,-79.568023,Museum,12041.342234,14844.072648,14539.258701,14889.792576,11854.46516,5977.812819,...,12452.526135,25872.445354,2348.940357,8208.115747,9029.062913,26383.64654,13813.884299,6253.535711,4738.417377,26466.465759
4,BBYO Ontario,43.762823,-79.441666,Museum,9632.836331,7150.479971,9045.743952,3815.835533,5492.725205,12504.224391,...,8018.506758,13964.884999,16544.495958,10817.223783,8895.435091,15178.098233,12657.527668,11113.235107,13354.072009,10901.524391


POI -> Drop category column, rename columns

In [761]:
poi_df.head()

Unnamed: 0,NAME,CATEGORY,poi_lat,poi_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,BMO Field,Sports / Entertainment Venue,43.632664,-79.418614,4948.413884,8093.650887,6356.628623,15833.10567,9625.823552,6826.038187,...,16402.966878,10063.717532,5786.198142,7274.190756,16411.013755,2891.59329,12766.402033,7773.453214,19979.006886,Sports
1,Aga Khan Museum,Museum,43.725386,-79.332331,9242.162144,6185.975906,6729.74824,13383.222857,10125.045362,15206.267598,...,4948.679364,19843.284675,12905.967647,12046.928746,5797.322502,9543.647569,17616.143715,16440.003325,7951.182505,Museum
2,Air Canada Centre,Sports / Entertainment Venue,43.643438,-79.379001,5283.518492,7088.444001,5164.789063,16219.820506,10114.784542,9569.587715,...,13344.885422,13305.150506,7936.423134,8812.684262,13218.087242,1851.674416,14951.926791,10674.277388,17709.395319,Sports
3,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,43.666207,-79.404062,2099.982618,4275.840866,2456.961601,13000.510209,6876.439586,7483.331473,...,13098.022885,11808.869978,5437.448132,5850.107931,13328.211472,1509.636648,12153.781084,8706.628679,16112.34184,Cinema
4,Alexander Muir Memorial Gardens,Nature/ Park,43.721534,-79.401164,5331.452244,1881.686648,3750.872623,8605.053916,4572.299847,10452.763811,...,10482.194401,15071.552084,8223.163554,6921.82332,11352.631703,7559.549111,12085.672645,11615.631606,10821.752287,Outdoor Attr


In [764]:
temp_poi_df = poi_df.drop(["CATEGORY"],axis=1)
temp_poi_df = temp_poi_df.rename(columns={"NAME": "Venue","poi_lat": "Latitude" ,"poi_lon": "Longitude"})
temp_poi_df = temp_poi_df[column_order]
temp_poi_df.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,BMO Field,43.632664,-79.418614,Sports,4948.413884,8093.650887,6356.628623,15833.10567,9625.823552,6826.038187,...,6563.437563,16402.966878,10063.717532,5786.198142,7274.190756,16411.013755,2891.59329,12766.402033,7773.453214,19979.006886
1,Aga Khan Museum,43.725386,-79.332331,Museum,9242.162144,6185.975906,6729.74824,13383.222857,10125.045362,15206.267598,...,8564.229571,4948.679364,19843.284675,12905.967647,12046.928746,5797.322502,9543.647569,17616.143715,16440.003325,7951.182505
2,Air Canada Centre,43.643438,-79.379001,Sports,5283.518492,7088.444001,5164.789063,16219.820506,10114.784542,9569.587715,...,6616.490558,13344.885422,13305.150506,7936.423134,8812.684262,13218.087242,1851.674416,14951.926791,10674.277388,17709.395319
3,Al Green Theatre (at the Miles Nadal Jewish Co...,43.666207,-79.404062,Cinema,2099.982618,4275.840866,2456.961601,13000.510209,6876.439586,7483.331473,...,3382.969228,13098.022885,11808.869978,5437.448132,5850.107931,13328.211472,1509.636648,12153.781084,8706.628679,16112.34184
4,Alexander Muir Memorial Gardens,43.721534,-79.401164,Outdoor Attr,5331.452244,1881.686648,3750.872623,8605.053916,4572.299847,10452.763811,...,3975.012586,10482.194401,15071.552084,8223.163554,6921.82332,11352.631703,7559.549111,12085.672645,11615.631606,10821.752287


Schools -> Remove School_type_dec, Rename columns

In [763]:
schools_df.head()

Unnamed: 0,NAME,SCHOOL_TYPE_DESC,LATITUDE,LONGITUDE,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,A PLUS ACADEMY OF ADVANCEMENT,Private,43.732091,-79.267107,14163.392786,11412.015401,11628.510545,18249.615542,15431.71522,20235.0663,...,564.816698,24821.23546,17949.330095,17255.279217,905.953381,13645.704398,22922.579364,21481.94719,8017.727928,Schools
1,A R S ARMENIAN PRIVATE SCHOOL,Private,43.770062,-79.323159,13233.26601,9733.395499,10938.550926,13352.411487,12421.614133,18728.572091,...,5614.98278,23350.929775,16481.693047,15183.212854,7072.447827,14255.258101,19750.004476,19895.041064,2942.530288,Schools
2,A Y J GLOBAL ACADEMY,Private,43.774091,-79.32522,13485.957481,9965.801881,11227.524088,13210.08453,12507.844386,18909.105311,...,6046.614686,23519.538664,16675.19278,15335.332862,7506.772149,14596.110437,19782.065604,20064.994388,2564.446011,Schools
3,A Y JACKSON SECONDARY SCHOOL,English Public,43.80527,-79.366559,15030.690846,11566.358578,13286.791126,10781.764916,12429.749827,19459.843187,...,10849.656861,23817.397476,17446.398113,15740.946282,12310.056397,17036.969844,18779.152265,20467.422896,4354.216505,Schools
4,ABACUS MONTESSORI LEARNING CENTRE,Private,43.725441,-79.319013,10137.218378,7195.376268,7604.844848,14396.549387,11192.152301,16149.884946,...,3908.611548,20774.394358,13851.852524,13053.967905,4724.212912,10176.931871,18676.307247,17389.221896,7818.188136,Schools


In [765]:
temp_schools_df = schools_df.drop(["SCHOOL_TYPE_DESC"],axis=1)
temp_schools_df = temp_schools_df.rename(columns={"NAME": "Venue","LATITUDE": "Latitude" ,"LONGITUDE": "Longitude"})
temp_schools_df = temp_schools_df[column_order]
temp_schools_df.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,A PLUS ACADEMY OF ADVANCEMENT,43.732091,-79.267107,Schools,14163.392786,11412.015401,11628.510545,18249.615542,15431.71522,20235.0663,...,13709.465573,564.816698,24821.23546,17949.330095,17255.279217,905.953381,13645.704398,22922.579364,21481.94719,8017.727928
1,A R S ARMENIAN PRIVATE SCHOOL,43.770062,-79.323159,Schools,13233.26601,9733.395499,10938.550926,13352.411487,12421.614133,18728.572091,...,12139.27321,5614.98278,23350.929775,16481.693047,15183.212854,7072.447827,14255.258101,19750.004476,19895.041064,2942.530288
2,A Y J GLOBAL ACADEMY,43.774091,-79.32522,Schools,13485.957481,9965.801881,11227.524088,13210.08453,12507.844386,18909.105311,...,12353.885997,6046.614686,23519.538664,16675.19278,15335.332862,7506.772149,14596.110437,19782.065604,20064.994388,2564.446011
3,A Y JACKSON SECONDARY SCHOOL,43.80527,-79.366559,Schools,15030.690846,11566.358578,13286.791126,10781.764916,12429.749827,19459.843187,...,13579.06347,10849.656861,23817.397476,17446.398113,15740.946282,12310.056397,17036.969844,18779.152265,20467.422896,4354.216505
4,ABACUS MONTESSORI LEARNING CENTRE,43.725441,-79.319013,Schools,10137.218378,7195.376268,7604.844848,14396.549387,11192.152301,16149.884946,...,9541.647586,3908.611548,20774.394358,13851.852524,13053.967905,4724.212912,10176.931871,18676.307247,17389.221896,7818.188136


Health Centers -> Rename columns

In [766]:
health_df.head()

Unnamed: 0,AGENCY_NAME,health_lat,health_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),Lansing-Westgate (38),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,Rouge Valley Health System,43.780081,-79.204961,21146.148665,18060.148511,18615.338947,22914.459377,21570.504427,27128.474092,17927.881202,...,7364.930942,31766.392653,24828.054443,23891.520331,7524.957236,20921.909028,29055.05051,28360.153803,8953.60904,Health Centers
1,"University Health Network, Toronto Rehabilitat...",43.635,-79.433083,4697.354489,8100.053113,6523.711592,15206.176354,9061.570482,5657.768819,13268.850164,...,17089.515595,8897.176107,4788.435257,6429.415408,17180.204707,3551.522523,11672.691234,6582.405023,20268.888988,Health Centers
2,Cliffcrest Health Centre,43.72323,-79.235151,16184.748457,13730.007366,13691.63169,20976.570898,17911.402281,22268.501484,15657.955123,...,3295.265344,26776.665577,20016.226283,19495.885229,2049.400746,15189.191322,25346.283931,23518.067192,10260.003464,Health Centers
3,Golden Care Dental Services,43.809221,-79.269312,19349.302843,15869.096045,17001.238046,18322.164805,18413.240286,24862.336777,13922.570623,...,8209.951937,29476.038202,22621.080175,21291.147578,9323.592516,20133.774888,25558.919127,26021.231019,3902.861241,Health Centers
4,"Toronto, City of",43.773051,-79.25648,17308.655427,14075.945045,14810.950921,18729.821604,17413.806935,23190.237285,13709.283397,...,4380.868345,27840.347277,20894.151225,19851.538743,5286.473663,17463.215426,24874.489451,24407.471495,5276.577986,Health Centers


In [767]:
temp_health_df = health_df.rename(columns={"AGENCY_NAME": "Venue","health_lat": "Latitude" ,"health_lon": "Longitude"})
temp_health_df = temp_health_df[column_order]
temp_health_df.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Rouge Valley Health System,43.780081,-79.204961,Health Centers,21146.148665,18060.148511,18615.338947,22914.459377,21570.504427,27128.474092,...,20471.79273,7364.930942,31766.392653,24828.054443,23891.520331,7524.957236,20921.909028,29055.05051,28360.153803,8953.60904
1,"University Health Network, Toronto Rehabilitat...",43.635,-79.433083,Health Centers,4697.354489,8100.053113,6523.711592,15206.176354,9061.570482,5657.768819,...,6278.180709,17089.515595,8897.176107,4788.435257,6429.415408,17180.204707,3551.522523,11672.691234,6582.405023,20268.888988
2,Cliffcrest Health Centre,43.72323,-79.235151,Health Centers,16184.748457,13730.007366,13691.63169,20976.570898,17911.402281,22268.501484,...,15916.617085,3295.265344,26776.665577,20016.226283,19495.885229,2049.400746,15189.191322,25346.283931,23518.067192,10260.003464
3,Golden Care Dental Services,43.809221,-79.269312,Health Centers,19349.302843,15869.096045,17001.238046,18322.164805,18413.240286,24862.336777,...,18279.998366,8209.951937,29476.038202,22621.080175,21291.147578,9323.592516,20133.774888,25558.919127,26021.231019,3902.861241
4,"Toronto, City of",43.773051,-79.25648,Health Centers,17308.655427,14075.945045,14810.950921,18729.821604,17413.806935,23190.237285,...,16511.030724,4380.868345,27840.347277,20894.151225,19851.538743,5286.473663,17463.215426,24874.489451,24407.471495,5276.577986


Transport -> Remove stop id, stop code; rename columns

In [768]:
transport_df.head()

Unnamed: 0,stop_name,stop_id,stop_code,stop_lat,stop_lon,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),...,Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117),Group
0,DANFORTH RD AT KENNEDY RD,262,662,43.714379,-79.260939,13904.800715,11547.471205,11424.284204,19228.329565,15809.248752,...,2509.941224,24482.471103,17739.447255,17264.101326,1242.619971,12901.609953,23189.263877,21233.852452,9999.456135,Public transport
1,DAVENPORT RD AT BEDFORD RD,263,929,43.674448,-79.399659,2103.070814,3374.918172,1496.98926,12432.616972,6435.998695,...,12284.403257,12451.500698,5834.650155,5922.013535,12581.315367,2329.61321,12256.501866,9243.568359,15135.05774,Public transport
2,DAVENPORT RD AT DUPONT ST,264,940,43.675511,-79.401938,1907.69798,3244.63267,1410.577189,12230.296127,6221.896689,...,12374.164828,12324.759554,5668.725638,5717.876679,12693.249333,2468.787893,12051.208111,9096.711805,15115.079353,Public transport
3,DAVISVILLE AVE AT CLEVELAND ST,265,1871,43.702088,-79.378112,4735.596475,2073.898644,2244.243836,11386.406514,6518.623581,...,9281.227445,15351.449123,8429.844494,7740.102342,9839.683901,5608.66763,13687.717366,11967.443023,11610.873704,Public transport
4,DISCO RD AT ATTWELL DR,266,11700,43.701362,-79.594843,13919.361183,15420.396066,15949.913848,11136.924568,11199.284397,...,26251.511566,8612.850717,10622.270897,10099.233499,27088.279614,16796.603856,3796.124921,8627.213242,24925.303401,Public transport


In [769]:
temp_transport_df = transport_df.drop(["stop_id",'stop_code'],axis=1)
temp_transport_df = temp_transport_df.rename(columns={"stop_name": "Venue","stop_lat": "Latitude" ,"stop_lon": "Longitude"})
temp_transport_df = temp_transport_df[column_order]
temp_transport_df.head()

Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,DANFORTH RD AT KENNEDY RD,43.714379,-79.260939,Public transport,13904.800715,11547.471205,11424.284204,19228.329565,15809.248752,19984.63947,...,13681.45672,2509.941224,24482.471103,17739.447255,17264.101326,1242.619971,12901.609953,23189.263877,21233.852452,9999.456135
1,DAVENPORT RD AT BEDFORD RD,43.674448,-79.399659,Public transport,2103.070814,3374.918172,1496.98926,12432.616972,6435.998695,8000.91398,...,2939.542744,12284.403257,12451.500698,5834.650155,5922.013535,12581.315367,2329.61321,12256.501866,9243.568359,15135.05774
2,DAVENPORT RD AT DUPONT ST,43.675511,-79.401938,Public transport,1907.69798,3244.63267,1410.577189,12230.296127,6221.896689,7851.77104,...,2722.861378,12374.164828,12324.759554,5668.725638,5717.876679,12693.249333,2468.787893,12051.208111,9096.711805,15115.079353
3,DAVISVILLE AVE AT CLEVELAND ST,43.702088,-79.378112,Public transport,4735.596475,2073.898644,2244.243836,11386.406514,6518.623581,10726.814282,...,4169.791624,9281.227445,15351.449123,8429.844494,7740.102342,9839.683901,5608.66763,13687.717366,11967.443023,11610.873704
4,DISCO RD AT ATTWELL DR,43.701362,-79.594843,Public transport,13919.361183,15420.396066,15949.913848,11136.924568,11199.284397,9343.703831,...,13521.076454,26251.511566,8612.850717,10622.270897,10099.233499,27088.279614,16796.603856,3796.124921,8627.213242,24925.303401


### 6.2 Concatenate datasets

In [771]:
concat_venues = pd.concat([temp_toronto_venues, temp_comm_ctr_df, temp_poi_df,temp_schools_df,temp_health_df,temp_transport_df])
print("The dataset contains {} rows and {} columns".format(concat_venues.shape[0],concat_venues.shape[1]))
concat_venues.head()

The dataset contains 15362 rows and 144 columns


Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Outdoor Attr,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,1299.421379,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195
1,Wychwood Barns,43.680028,-79.42381,Salon,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,1298.26494,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149
2,Pukka Restaurant,43.681055,-79.429187,Food Indo,546.646894,3339.736841,2635.756678,10566.987177,4360.766585,5997.584387,...,1152.430441,13998.37515,10616.876984,3708.11397,3446.019762,14476.955589,3996.890973,9773.353638,7242.20624,15756.791034
3,The Stockyards,43.68157,-79.42621,Food Gral,519.790579,3150.136931,2389.388809,10627.907583,4441.827253,6238.746856,...,1095.244545,13757.214632,10853.848663,3951.542755,3675.877431,14231.817084,3892.102904,9997.185665,7484.185515,15570.156688
4,CocoaLatte,43.681768,-79.425158,Cafe,539.513265,3083.635083,2301.847951,10649.181119,4471.940719,6324.807821,...,1086.062642,13671.290535,10938.538717,4038.318142,3757.399654,14144.613497,3859.596992,10076.193162,7570.482327,15503.185185


### 6.3 Clean the concatenated dataset (remove "Non usable" groups, remove duplicates)

In [781]:
clean_venues = concat_venues[concat_venues["Group"] != 'Non usable']
clean_venues.reset_index(drop=True,inplace=True)
print("The dataset contains {} rows and {} columns".format(clean_venues.shape[0],clean_venues.shape[1]))
clean_venues.head(2)

The dataset contains 15205 rows and 144 columns


Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Outdoor Attr,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,1299.421379,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195
1,Wychwood Barns,43.680028,-79.42381,Salon,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,1298.26494,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149


In [782]:
print("There are {} venues duplicated".format(clean_venues.duplicated(subset = ['Venue','Latitude','Longitude','Group']).sum()))

There are 25 venues duplicated


In [783]:
clean_venues = clean_venues.drop_duplicates(subset=['Venue','Latitude','Longitude','Group'])
clean_venues.reset_index(drop=True,inplace=True)
print("The dataset contains {} rows and {} columns".format(clean_venues.shape[0],clean_venues.shape[1]))
clean_venues.head(2)

The dataset contains 15180 rows and 144 columns


Unnamed: 0,Venue,Latitude,Longitude,Group,Wychwood (94),Yonge-Eglinton (100),Yonge-St.Clair (97),York University Heights (27),Yorkdale-Glen Park (31),Lambton Baby Point (114),...,Humewood-Cedarvale (106),Ionview (125),Islington-City Centre West (14),Junction Area (90),Keelesdale-Eglinton West (110),Kennedy Park (124),Kensington-Chinatown (78),Kingsview Village-The Westway (6),Kingsway South (15),L'Amoreaux (117)
0,Wychwood Barns Farmers' Market,43.68001,-79.423849,Outdoor Attr,368.794665,3191.536781,2268.875527,10871.175669,4692.46875,6340.661378,...,1299.421379,13663.344828,10937.105455,4067.364508,3889.386717,14114.598661,3639.50963,10218.489484,7588.923034,15603.150195
1,Wychwood Barns,43.680028,-79.42381,Salon,371.776434,3188.240868,2265.219105,10870.967738,4692.652052,6344.324861,...,1298.26494,13659.65381,10940.803504,4070.980244,3892.166059,14110.959854,3639.245211,10221.157027,7592.581423,15599.751149


## 7. Save the generated datasets

Files will be saved in Datasets/out/file_name.csv

In [785]:
toronto_venues.to_csv("Datasets/out/fq_venues.csv",index=False)
comm_ctr_df.to_csv("Datasets/out/comm_ctr.csv",index=False)
poi_df.to_csv("Datasets/out/poi.csv",index=False)
schools_df.to_csv("Datasets/out/schools.csv",index=False)
health_df.to_csv("Datasets/out/health_ctr.csv",index=False)
transport_df.to_csv("Datasets/out/transport.csv",index=False)
concat_venues.to_csv("Datasets/out/all_venues.csv",index=False)
clean_venues.to_csv("Datasets/out/clean_all_venues.csv",index=False)

neighborhood_df.to_csv("Datasets/out/neighborhoods.csv",index=False)
neigh_soc_df.to_csv("Datasets/out/social_metrics.csv",index=False)
neigh_crime_df.to_csv("Datasets/out/crime_rate.csv",index=False)