### Import All the required libraries

In [1]:
import pandas as pd
import folium
import geopandas as gp

### Initiate a map on any location in Toronto

In [2]:
map = folium.Map(location=[43.266775,-79.866222], zoom_start=10, prefer_canvas=True)

---
---
## Hospital Dataset

In [4]:
df_hospitals = pd.read_csv('../Data/Ministry_of_Health_Service_Provider_Locations.csv')

### Checking the dataset

In [6]:
df_hospitals.head()

Unnamed: 0,X,Y,OGF_ID,MOH_SERVICE_PROVIDER_IDENT,SERVICE_TYPE,SERVICE_TYPE_DETAIL,ENGLISH_NAME,FRENCH_NAME,ENGLISH_NAME_ALT,FRENCH_NAME_ALT,ADDRESS_LINE_1,ADDRESS_LINE_2,ADDRESS_DESCRIPTOR,COMMUNITY,POSTAL_CODE,GEOMETRY_UPDATE_DATETIME,EFFECTIVE_DATETIME,SYSTEM_DATETIME,OBJECTID
0,-79.369347,43.649813,127907976,9999999990,AIDS Bureau,CBAESP,2-Spirited People of the 1st Nations,,,,145 Front Street East,Unit 105,Practice Location,Toronto,M5A1E3,1970/01/01 00:00:00+00,2020/04/03 20:42:44+00,2020/04/03 20:42:44+00,753601
1,-79.384047,43.664337,127907977,9999999991,AIDS Bureau,CBAESP,Action Positive VIH-SIDA,,,,543 Yonge Street,4th floor,Practice Location,Toronto,M4Y1Y5,1970/01/01 00:00:00+00,2020/04/03 20:42:44+00,2020/04/03 20:42:44+00,753602
2,-79.3772,43.64999,127907978,9999999992,AIDS Bureau,CBAESP,African and Caribbean Council on HIV/AIDS in O...,,,,20 Victoria Street,4th floor,Practice Location,Toronto,M5C2N8,1970/01/01 00:00:00+00,2020/04/03 20:42:44+00,2020/04/03 20:42:44+00,753603
3,-79.364552,43.654664,127907979,9999999993,AIDS Bureau,CBAESP,Africans in Partnership Against AIDS,,,,526 Richmond Street East,2nd Floor,Practice Location,Toronto,M5A1R3,1970/01/01 00:00:00+00,2020/04/03 20:42:44+00,2020/04/03 20:42:44+00,753604
4,-79.375083,43.66765,127907980,9999999994,AIDS Bureau,CBAESP,AIDS Bereavement & Resiliency Project of Ontar...,,,,490 Sherbourne Street,2nd Floor,Practice Location,Toronto,M4X1K9,1970/01/01 00:00:00+00,2020/04/03 20:42:44+00,2020/04/03 20:42:44+00,753605


In [5]:
df_hospitals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11246 entries, 0 to 11245
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   X                           11246 non-null  float64
 1   Y                           11246 non-null  float64
 2   OGF_ID                      11246 non-null  int64  
 3   MOH_SERVICE_PROVIDER_IDENT  11246 non-null  object 
 4   SERVICE_TYPE                11246 non-null  object 
 5   SERVICE_TYPE_DETAIL         9975 non-null   object 
 6   ENGLISH_NAME                11246 non-null  object 
 7   FRENCH_NAME                 5729 non-null   object 
 8   ENGLISH_NAME_ALT            7223 non-null   object 
 9   FRENCH_NAME_ALT             5395 non-null   object 
 10  ADDRESS_LINE_1              11246 non-null  object 
 11  ADDRESS_LINE_2              7511 non-null   object 
 12  ADDRESS_DESCRIPTOR          7782 non-null   object 
 13  COMMUNITY                   112

In [6]:
df_hospitals.COMMUNITY.value_counts()

Toronto        2301
Ottawa          684
Hamilton        472
Mississauga     447
London          310
               ... 
Berwick           1
Bonfield          1
Newington         1
Lombardy          1
Alliston          1
Name: COMMUNITY, Length: 521, dtype: int64

In [7]:
df_hospitals.columns

Index(['X', 'Y', 'OGF_ID', 'MOH_SERVICE_PROVIDER_IDENT', 'SERVICE_TYPE',
       'SERVICE_TYPE_DETAIL', 'ENGLISH_NAME', 'FRENCH_NAME',
       'ENGLISH_NAME_ALT', 'FRENCH_NAME_ALT', 'ADDRESS_LINE_1',
       'ADDRESS_LINE_2', 'ADDRESS_DESCRIPTOR', 'COMMUNITY', 'POSTAL_CODE',
       'GEOMETRY_UPDATE_DATETIME', 'EFFECTIVE_DATETIME', 'SYSTEM_DATETIME',
       'OBJECTID'],
      dtype='object')

*Dropping unwanted columns*

In [8]:
df_hospitals.drop(columns=['MOH_SERVICE_PROVIDER_IDENT', 'SERVICE_TYPE','SERVICE_TYPE_DETAIL', 'FRENCH_NAME',
                           'ENGLISH_NAME_ALT', 'FRENCH_NAME_ALT', 'GEOMETRY_UPDATE_DATETIME', 'EFFECTIVE_DATETIME', 
                           'SYSTEM_DATETIME'], inplace=True)

*Changing names of the columns*

In [9]:
df_hospitals.rename(columns={'X':'Long','Y':'Lat'}, inplace=True)

In [10]:
df_hospitals.isna().sum()

Long                     0
Lat                      0
OGF_ID                   0
ENGLISH_NAME             0
ADDRESS_LINE_1           0
ADDRESS_LINE_2        3735
ADDRESS_DESCRIPTOR    3464
COMMUNITY                0
POSTAL_CODE              0
OBJECTID                 0
dtype: int64

*Drop the rows where the Community is not Toronto*

In [11]:
df_hospitals.drop(df_hospitals[df_hospitals.COMMUNITY != 'Toronto'].index, inplace=True)

*Reset index for simplicty*

In [12]:
df_hospitals.reset_index(drop=True, inplace=True)

In [13]:
df_hospitals.COMMUNITY.value_counts()

Toronto    2301
Name: COMMUNITY, dtype: int64

*work on dummy data*

In [14]:
dummy_hosp = df_hospitals.copy()

dummy_hosp =  dummy_hosp[(dummy_hosp['Lat'].between(43.45000,43.84000))
                          & (dummy_hosp['Long'].between(-79.70000,-79.20000))]

dummy_hosp.reset_index(drop=True,inplace=True)

In [15]:
hlocations = dummy_hosp[['Lat', 'Long']]
hlocationlist = hlocations.values.tolist()

for point in range(0, len(hlocationlist)):
    folium.Marker(hlocationlist[point], popup=dummy_hosp['ENGLISH_NAME'][point],
                 icon=folium.Icon(color='red', icon_color='black', icon='h-square', prefix='fa')).add_to(map)

In [17]:
dummy_hosp.to_csv('../Data/Cleaned/Hospitals.csv', index=False)

*create a list for mapping data*

In [61]:
#locations = df_hospitals[['Lat', 'Long']]
#locationlist = locations.values.tolist()

In [66]:
#for point in range(0, len(locationlist)):
#    folium.Marker(locationlist[point], popup=df_hospitals['ENGLISH_NAME'][point],
#                 icon=folium.Icon(color='red', icon_color='black', icon='h-square', prefix='fa')).add_to(map)

---
---
## School Dataset

In [18]:
df_schools = pd.read_csv('../Data/School locations-all types data.csv')

In [22]:
df_schools.head()

Unnamed: 0,_id,OBJECTID,GEO_ID,NAME,BOARD_NAME,SOURCE_ADDRESS,SCHOOL_TYPE_DESC,ADDRESS_POINT_ID,ADDRESS_NUMBER,LINEAR_NAME_FULL,ADDRESS_FULL,POSTAL_CODE,MUNICIPALITY,CITY,PLACE_NAME,GENERAL_USE_CODE,CENTRELINE_ID,LATITUDE,LONGITUDE,geometry
0,28977,1,330677,A PLUS ACADEMY OF ADVANCEMENT,,2425 EGLINTON AVE E,Private,330677,2425,Eglinton Ave E,2425 Eglinton Ave E,M1K 5G8,Scarborough,Toronto,,104008,111254,,,"{u'type': u'Point', u'coordinates': (-79.26710..."
1,28978,2,524780,A R S ARMENIAN PRIVATE SCHOOL,,50 HALLCROWN PL,Private,524780,50,Hallcrown Pl,50 Hallcrown Pl,M2J 1P7,North York,Toronto,,104008,438652,,,"{u'type': u'Point', u'coordinates': (-79.32315..."
2,28979,3,20258267,A Y J GLOBAL ACADEMY,,4 LANSING SQ,Private,20258267,4,Lansing Sq,4 Lansing Sq,M2J 5A2,North York,Toronto,,104008,438287,,,"{u'type': u'Point', u'coordinates': (-79.32521..."
3,28980,4,517961,A Y JACKSON SECONDARY SCHOOL,Toronto District School Board,50 FRANCINE DR,English Public,517961,50,Francine Dr,50 Francine Dr,M2H 2G6,North York,Toronto,A. Y. Jackson Secondary School,102002,436260,,,"{u'type': u'Point', u'coordinates': (-79.36655..."
4,28981,5,13967401,ABACUS MONTESSORI LEARNING CENTRE,,4 CREDIT UNION DR,Private,13967401,4,Credit Union Dr,4 Credit Union Dr,M4A 2N8,North York,Toronto,Latvian Canadian Centre,106007,444179,,,"{u'type': u'Point', u'coordinates': (-79.31900..."


*Drop unwanted columns*

In [19]:
df_schools.columns

Index(['_id', 'OBJECTID', 'GEO_ID', 'NAME', 'SCHOOL_LEVEL', 'SCHOOL_TYPE',
       'BOARD_NAME', 'SOURCE_ADDRESS', 'SCHOOL_TYPE_DESC', 'ADDRESS_POINT_ID',
       'ADDRESS_NUMBER', 'LINEAR_NAME_FULL', 'ADDRESS_FULL', 'POSTAL_CODE',
       'MUNICIPALITY', 'CITY', 'PLACE_NAME', 'GENERAL_USE_CODE',
       'CENTRELINE_ID', 'LO_NUM', 'LO_NUM_SUF', 'HI_NUM', 'HI_NUM_SUF',
       'LINEAR_NAME_ID', 'X', 'Y', 'LATITUDE', 'LONGITUDE', 'geometry'],
      dtype='object')

In [20]:
df_schools.drop(columns=['SCHOOL_LEVEL', 'SCHOOL_TYPE','GENERAL_USE_CODE','CENTRELINE_ID',
                           'LO_NUM', 'LO_NUM_SUF', 'HI_NUM', 'HI_NUM_SUF','LINEAR_NAME_ID', 'X', 'Y',], inplace=True)

*Change the geometry column to create a new col Lat and Long*

In [21]:
def longlat(row):
    row = row.split('(')[1].split(')')[0]
    long , lat = row.split(',')
    return pd.Series([long,lat])

In [23]:
df_schools[['LONGITUDE','LATITUDE']] = df_schools.geometry.apply(longlat)

In [30]:
df_schools.head()

Unnamed: 0,_id,OBJECTID,GEO_ID,NAME,BOARD_NAME,SOURCE_ADDRESS,SCHOOL_TYPE_DESC,ADDRESS_POINT_ID,ADDRESS_NUMBER,LINEAR_NAME_FULL,ADDRESS_FULL,POSTAL_CODE,MUNICIPALITY,CITY,PLACE_NAME,LATITUDE,LONGITUDE,geometry
0,28977,1,330677,A PLUS ACADEMY OF ADVANCEMENT,,2425 EGLINTON AVE E,Private,330677,2425,Eglinton Ave E,2425 Eglinton Ave E,M1K 5G8,Scarborough,Toronto,,43.732081946,-79.2671039369,"{u'type': u'Point', u'coordinates': (-79.26710..."
1,28978,2,524780,A R S ARMENIAN PRIVATE SCHOOL,,50 HALLCROWN PL,Private,524780,50,Hallcrown Pl,50 Hallcrown Pl,M2J 1P7,North York,Toronto,,43.7700530625,-79.3231561477,"{u'type': u'Point', u'coordinates': (-79.32315..."
2,28979,3,20258267,A Y J GLOBAL ACADEMY,,4 LANSING SQ,Private,20258267,4,Lansing Sq,4 Lansing Sq,M2J 5A2,North York,Toronto,,43.7740825707,-79.3252164121,"{u'type': u'Point', u'coordinates': (-79.32521..."
3,28980,4,517961,A Y JACKSON SECONDARY SCHOOL,Toronto District School Board,50 FRANCINE DR,English Public,517961,50,Francine Dr,50 Francine Dr,M2H 2G6,North York,Toronto,A. Y. Jackson Secondary School,43.8052613014,-79.3665552934,"{u'type': u'Point', u'coordinates': (-79.36655..."
4,28981,5,13967401,ABACUS MONTESSORI LEARNING CENTRE,,4 CREDIT UNION DR,Private,13967401,4,Credit Union Dr,4 Credit Union Dr,M4A 2N8,North York,Toronto,Latvian Canadian Centre,43.7254322328,-79.3190099774,"{u'type': u'Point', u'coordinates': (-79.31900..."


*Including schools only in toronto*

In [24]:
dummy_schools = df_schools.copy() 

dummy_schools = dummy_schools.astype({'LATITUDE': 'float64', 'LONGITUDE':'float64'})

dummy_schools =  dummy_schools[(dummy_schools['LATITUDE'].between(43.45000,43.84000))
                          & (dummy_schools['LONGITUDE'].between(-79.70000,-79.20000))]

dummy_schools.reset_index(drop=True,inplace=True)

In [25]:
dschool_locations = dummy_schools[['LATITUDE','LONGITUDE']]
dschool_locationslist = dschool_locations.values.tolist()

for point in range(0, len(dschool_locationslist)):
    folium.Marker(dschool_locationslist[point], popup=dummy_schools['NAME'][point],
                 icon=folium.Icon(color='lightgray', icon_color='black', icon='graduation-cap', prefix='fa')).add_to(map)


In [26]:
dummy_schools.to_csv('../Data/Cleaned/Schools.csv', index=False)

*create a list for mapping data*

In [71]:
#school_locations = df_schools[['LATITUDE','LONGITUDE']]
#school_locationslist = school_locations.values.tolist()

In [72]:
#for point in range(0, len(school_locationslist)):
#    folium.Marker(school_locationslist[point], popup=df_schools['NAME'][point],
#                 icon=folium.Icon(color='lightgray', icon_color='black', icon='graduation-cap', prefix='fa')).add_to(map)

---
---
## Park Dataset

In [27]:
df_park = gp.read_file('../Data/CITY_GREEN_SPACE_WGS84.geojson')

In [37]:
df_park.head()

Unnamed: 0,OBJECTID,GEO_ID,TYPE_DESC,SCODE_NAME,LCODE_NAME,NAME,geometry
0,180,180,CGSP,,3017,CITY WIDE OPEN SPACE,"POLYGON ((-79.22672 43.80698, -79.22690 43.806..."
1,193,193,CGSP,,3050,PUBLIC ACCESS PROPERTY,"POLYGON ((-79.20123 43.80354, -79.20121 43.803..."
2,325,325,CGSP,,3017,CITY WIDE OPEN SPACE,"POLYGON ((-79.24170 43.77960, -79.24184 43.779..."
3,327,327,CGSP,,3024,EAST HIGHLAND CREEK WATERCOURSE,"POLYGON ((-79.23614 43.77942, -79.23617 43.779..."
4,328,328,CGSP,,3017,CITY WIDE OPEN SPACE,"POLYGON ((-79.41225 43.77916, -79.41228 43.779..."


*Add parks to map*

In [28]:
style = {'fillColor': '#1a6310', 'color': '#1a6310'}
folium.GeoJson(df_park, name="geojson", style_function = lambda x:style).add_to(map)

<folium.features.GeoJson at 0x7fe95fd15460>

---
---
## Transit Data

In [29]:
df_stops = pd.read_csv('../Data/GO_GTFS/stops.txt')

In [39]:
df_stops.head()

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,wheelchair_boarding,stop_code
0,WR,West Harbour GO,43.266775,-79.866222,18,https://www.gotransit.com/en/stations-stops-pa...,0,,1,
1,WH,Whitby GO,43.86484,-78.93818,93,https://www.gotransit.com/en/stations-stops-pa...,0,,1,
2,WE,Weston GO,43.70022,-79.514671,4,https://www.gotransit.com/en/stations-stops-pa...,0,,1,
3,UN,Union Station,43.645195,-79.3806,2,https://www.gotransit.com/en/stations-stops-pa...,0,,1,
4,UI,Unionville GO,43.851689,-79.314332,71,https://www.gotransit.com/en/stations-stops-pa...,0,,1,


In [40]:
df_stops.isna().sum()

stop_id                   0
stop_name                 0
stop_lat                  0
stop_lon                  0
zone_id                   0
stop_url                  0
location_type             0
parent_station         1048
wheelchair_boarding       0
stop_code                66
dtype: int64

In [41]:
df_stops.shape

(1048, 10)

*Drop unwanted columns*

In [30]:
df_stops.drop(columns=['parent_station','stop_code'], axis=1,inplace=True)

*creating a dummy for filtering stops as per the refion of toronto*

In [32]:
dummy_stops = df_stops.copy()

#*_**north south east west** boundaries for toronto region* 

dummy_stops = dummy_stops[(dummy_stops['stop_lat'].between(43.45000,43.84000))
                          & (dummy_stops['stop_lon'].between(-79.70000,-79.20000))]

dummy_stops.info()

dummy_stops.reset_index(drop=True,inplace=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 162 entries, 2 to 1046
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   stop_id              162 non-null    object 
 1   stop_name            162 non-null    object 
 2   stop_lat             162 non-null    float64
 3   stop_lon             162 non-null    float64
 4   zone_id              162 non-null    int64  
 5   stop_url             162 non-null    object 
 6   location_type        162 non-null    int64  
 7   wheelchair_boarding  162 non-null    int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 11.4+ KB


In [33]:
dlocations = dummy_stops[['stop_lat', 'stop_lon']]
dlocationlist = dlocations.values.tolist()

for point in range(0, len(dlocationlist)):
    folium.Marker(dlocationlist[point], popup=dummy_stops['stop_name'][point],
                icon=folium.Icon(color='cadetblue', icon_color='black', icon='bus', prefix='fa')).add_to(map) 

In [34]:
dummy_stops.to_csv('../Data/Cleaned/Stops.csv', index=False)

*create a list for mapping data*

In [68]:
#locations = df_stops[['stop_lat', 'stop_lon']]
#locationlist = locations.values.tolist()

In [73]:
#for point in range(0, len(locationlist)):
#    folium.Marker(locationlist[point], popup=df_stops['stop_name'][point],
#                icon=folium.Icon(color='cadetblue', icon_color='black', icon='bus', prefix='fa')).add_to(map) 

---
---
---
## Folium Map