# Get Data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
import sys
import warnings
from datetime import datetime
from glob import glob
from io import BytesIO
from zipfile import ZipFile

import duckdb
import geopandas as gpd
import holidays
import pandas as pd
import requests
from meteostat import Daily, Stations
from watermark import watermark

In [3]:
PROJ_ROOT = os.pardir
src_dir = os.path.join(PROJ_ROOT, "src")
sys.path.append(src_dir)

In [4]:
%aimport clean
import clean as cl

%aimport file_utils
import file_utils as flut

%aimport geopandas_helpers
import geopandas_helpers as gpu

%aimport open_data
import open_data as od

%aimport pandas_utils
import pandas_utils as pu

In [5]:
def run_sql_query(query: str, verbose: bool=False) -> pd.DataFrame:
    """Run SQL query using DuckDB."""
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        df_query = duckdb.sql(query).df()
    if verbose:
        print(f"Query returned {len(df_query):,} rows")
    return df_query

## About

Retrieve or generate the following datasets

1. Bike Share Toronto ridership from 2018 to 2023, inclusive
2. City of Toronto Census Tract Boundaries Geodata
3. City of Toronto Neighbourhood Boundaries Geodata
4. Bike Share Toronto station info
5. City of Toronto colleges and universities
6. City of Toronto Daily Weather Data
7. City of Toronto Downtown and East-West Adjacent Neighbourhoods (generated)
8. Bike Share Toronto Network Expandsion Plans (2023 to 2025, generated)
9. City of Toronto Public Transit (Train) Routes Geodata
10. List of public holidays in Canada for 2018 to 2023. inclusive

## User Inputs

In [6]:
# weather
weather_start = datetime(2018, 1, 1)
weather_end = datetime(2023, 3, 31)
weather_station_near_coords = dict(lat=43.6771, lon=-79.6334)

# geodata
# # station info
url_spec = (
    'https://raw.githubusercontent.com/MobilityData/gbfs/master/systems.csv'
)
# # Toronto Open Data base URL
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
# # Toronto Open Data geodata dataset parameters
neigh_boundary_params = {"id": "neighbourhoods"}
# # co-ordinate reference system and projection
crs = 4326
epsg = 4536
# # neighbourhoods located in or adjacent to downtown Toronto
neighs = [
    {"Neighbourhood": "University", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Kensington-Chinatown", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Wellington Place", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Harbourfront-CityPlace", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Bay-Cloverhill", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Yonge-Bay Corridor", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "St Lawrence-East Bayfront-The Islands", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Church-Wellesley", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Downtown Yonge East", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "North St.James Town", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Cabbagetown-South St.James Town", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Moss Park", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Regent Park", "Location": "Downtown", 'is_downtown': True},
    {"Neighbourhood": "Roncesvalles", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "South Parkdale", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Dufferin Grove", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Little Portugal", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Palmerston-Little Italy", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Trinity-Bellwoods", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "West Queen West", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Fort York-Liberty Village", "Location": "West of Downtown", 'is_downtown': False},
    {"Neighbourhood": "North Riverdale", "Location": "East of Downtown", 'is_downtown': False},
    {"Neighbourhood": "South Riverdale", "Location": "East of Downtown", 'is_downtown': False},
    {"Neighbourhood": "Blake-Jones", "Location": "East of Downtown", 'is_downtown': False},
]
# # college and university locations
coll_univ_locations = {
    "Centennial College (Progress Campus)": {"lat": 43.783936, "lon": -79.228288},
    "Centennial College (Morningside Campus)": {"lat": 43.786243, "lon": -79.193541},
    "George Brown College (St James Campus)": {"lat": 43.651708, "lon": -79.370745},
    "George Brown College (Casa Loma Campus)": {"lat": 43.676259, "lon": -79.410209},
    "George Brown College (Waterfront Campus)": {"lat": 43.644076, "lon": -79.365148},
    "Humber College (North Campus)": {"lat": 43.730310, "lon": -79.606184},
    "Humber College (Lakeshore Campus)": {"lat": 43.596058, "lon": -79.520169},
    "OCAD University": {"lat": 43.653133, "lon": -79.391391},
    "Seneca College (Markham Campus)": {"lat": 43.7955, "lon": -79.3496},
    "Seneca College (Newnham Campus)": {"lat": 43.7955, "lon": -79.3496},
    "Seneca College (York University Campus)": {"lat": 43.771106, "lon": -79.500216},
    "Seneca College (Yorkgate Campus)": {"lat": 43.758833, "lon": -79.518493},
    "University of Toronto": {"lat": 43.663985, "lon": -79.399905},
    "University of Toronto (Scarborough Campus)": {"lat": 43.783100, "lon": -79.187521},
    "Toronto Metropolitan University": {"lat": 43.657591, "lon": -79.378786},
    "York University": {"lat": 43.772831, "lon": -79.497522},
    "York University (Glendon Campus)": {"lat": 43.728159, "lon": -79.378038},
}
# # timezone
my_timezone = 'America/Toronto'
# # public transit train lines
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
train_line_params = { "id": "ttc-subway-shapefiles"}
# # census tracts
fname_census_tracts = 'lct_000b21a_e.shp'

In [7]:
data_dir = os.path.join(PROJ_ROOT, 'data')
raw_data_dir = os.path.join(data_dir, 'raw', 'systems', 'toronto')

fpaths_bike_share_raw = glob(os.path.join(raw_data_dir, '*.csv'))
fpaths_weather = glob(
    os.path.join(raw_data_dir, 'daily_weather__*.parquet.gzip')
)
fpath_downtown_neighs = glob(
    os.path.join(raw_data_dir, 'downtown_neighbourhoods__*.parquet.gzip')
)
fpath_expansion = glob(
    os.path.join(raw_data_dir, 'network_expansion__*.parquet.gzip')
)
fpath_holidays = glob(
    os.path.join(raw_data_dir, 'fpath_holidays__*.parquet.gzip')
)
url = base_url + "/api/3/action/package_show"

## Get Data

### Bike Share Trips Data

Retrieve raw bike share trips (ridership) data from the Toronto Open Data Portal following the procedure below

1. Navigate to the [Toronto Open Data portal](https://www.toronto.ca/city-government/data-research-maps/open-data/)
2. Select **Open Data Portal**
3. Search for **bike share**
4. In the search results, select **Bike Share Toronto Ridership Data**
5. Select **DOWNLOAD DATA**
8. Click the **DOWNLOAD** button for each of the following **File**s
   - *bikeshare-ridership-2018*
   - *bikeshare-ridership-2019*
   - *bikeshare-ridership-2020*
   - *bikeshare-ridership-2021*
   - *bikeshare-ridership-2022*
   - *bikeshare-ridership-2023*

Verify that bike share ridership `.CSV` files were downloaded and exist locally in the correct filepath

In [8]:
assert len(fpaths_bike_share_raw) == 4+4+12+12+12+12

Extract path the latest `.csv` file with bike share ridership data

In [9]:
fpath_raw_trips = fpaths_bike_share_raw[-1]

Show all columns in the transit lines geodata

In [10]:
%%time
pu.show_df(pd.read_csv(fpath_raw_trips, encoding='unicode_escape').convert_dtypes())

column,Trip Id,Trip Duration,Start Station Id,Start Time,Start Station Name,End Station Id,End Time,End Station Name,Bike Id,User Type
dtype,Int64,Int64,Int64,string[python],string[python],Int64,string[python],string[python],Int64,string[python]
nans,0,0,0,0,47861,233,0,48003,0,0
0,25950721,487,7168,11/01/2023 00:00,Queens Quay / Yonge St,7064,11/01/2023 00:08,51 Parliament St,6069,Casual Member
1,25950722,199,7335,11/01/2023 00:00,Bay St / Bloor St W (West Side),7001,11/01/2023 00:03,Wellesley Station Green P,4350,Casual Member
2,25950723,912,7263,11/01/2023 00:00,Walton St / Elizabeth St - SMART,7176,11/01/2023 00:15,Bathurst St / Fort York Blvd,253,Casual Member
3,25950724,393,7155,11/01/2023 00:00,Bathurst St / Lennox St,7531,11/01/2023 00:06,541 Huron St - SMART,7117,Casual Member
4,25950725,294,7033,11/01/2023 00:00,Union Station,7059,11/01/2023 00:05,Front St W / Blue Jays Way,610,Casual Member
...,...,...,...,...,...,...,...,...,...,...
393868,26395694,405,7660,11/30/2023 23:59,285 Victoria St,7065,12/01/2023 00:05,Dundas St E / Parliament St,582,Casual Member
393869,26395695,10103,7311,11/30/2023 23:59,Sherbourne St / Isabella St,7311,12/01/2023 02:47,Sherbourne St / Isabella St,4358,Casual Member
393870,26395696,248,7007,11/30/2023 23:59,College St / Huron St,7274,12/01/2023 00:03,Queen's Park Cres E / Grosvenor St - SMART,2638,Casual Member
393871,26395698,65,7285,11/30/2023 23:59,Spadina Ave / Harbord St - SMART,7170,12/01/2023 00:00,Spadina Ave / Willcocks St,5184,Casual Member


CPU times: user 722 ms, sys: 63.3 ms, total: 786 ms
Wall time: 847 ms


### Toronto Census Tract Boundaries

Retrieve Census Tracts data from Statistics Canada (StatCan) following the procedure below

1. Navigate to the [StatCan page for Forward Sortation Area downloads](https://www150.statcan.gc.ca/n1/en/catalogue/92-179-X)
2. Select Title > Census Forward Sortation Area Boundary File, Census year 2021
3. Select Type > Cartographic Boundary Files (CBF)
4. In Administrative Boundaries, make no selection
5. Select Statistical Boundaries > Census tracts
6. In Non-standard boundaries, make no selection
7. Select Format > Downloadable > `Shapefile (.shp)`
8. Click Continue
9. Click the link to the ZIP document - *lcd_000b21a_e.zip (ZIP version, 136,594.0 kb)*
10. Extract the file into `data/raw/lct_000b21a_e`. The filepath of the `.shp` file should be `data/raw/lct_000b21a_e/lct_000b21a_e.shp`.

In [11]:
shp_filepath_census = os.path.join(
    raw_data_dir, os.path.splitext(fname_census_tracts)[0], fname_census_tracts
)

Verify that census tracts `.shp` file was downloaded and exists locally in the correct filepath

In [12]:
assert os.path.exists(shp_filepath_census)

Show all columns in the census tract boundaries geodata

In [13]:
%%time
pu.show_df(gpd.read_file(shp_filepath_census).convert_dtypes())

ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/envs/get-data/share/proj failed


column,CTUID,DGUID,CTNAME,LANDAREA,PRUID,geometry
dtype,string[python],string[python],string[python],Float64,string[python],geometry
nans,0,0,0,0,0,0
0,5370001.08,2021S05075370001.08,0001.08,1.6383,35,"POLYGON ((7196507.366 869787.991, 7196501.617 ..."
1,0010002.00,2021S05070010002.00,0002.00,1.9638,10,"POLYGON ((8980216.643 2151065.360, 8980377.609..."
2,5370001.09,2021S05075370001.09,0001.09,1.9699,35,"POLYGON ((7196437.003 869160.246, 7196434.403 ..."
3,5370120.02,2021S05075370120.02,0120.02,76.965,35,"POLYGON ((7189475.703 865662.849, 7189448.943 ..."
4,0010006.00,2021S05070010006.00,0006.00,1.0467,10,"POLYGON ((8980091.143 2152478.609, 8980100.254..."
...,...,...,...,...,...,...
6242,5591003.00,2021S05075591003.00,1003.00,227.6981,35,"POLYGON ((6984736.931 712991.680, 6984866.560 ..."
6243,5591004.00,2021S05075591004.00,1004.00,18.3792,35,"POLYGON ((6981553.974 701146.583, 6981728.169 ..."
6244,5800300.00,2021S05075800300.00,0300.00,314.4614,35,"POLYGON ((7084392.943 1223795.154, 7084451.317..."
6245,6020800.00,2021S05076020800.00,0800.00,8.6987,46,"POLYGON ((5828241.160 1509841.949, 5828231.780..."


CPU times: user 2.05 s, sys: 116 ms, total: 2.16 s
Wall time: 2.16 s


### Toronto Neighbourhood Boundaries

Retrieve link to [neighbourhood boundaries geodata](https://open.toronto.ca/dataset/neighbourhoods/)

In [14]:
%%time
shp_filepath = od.download_geo_open_data(
    raw_data_dir, base_url, neigh_boundary_params
)

Retrieved dataset neighbourhoods from filepath https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/neighbourhoods/resource/1d38e8b7-65a8-4dd0-88b0-ad2ce938126e/download/neighbourhoods-4326.geojson
CPU times: user 13.7 ms, sys: 4.54 ms, total: 18.2 ms
Wall time: 181 ms


Verify that retrieved boundaries link is valid

In [15]:
%%time
r = requests.get(shp_filepath)
assert r.status_code == 200

CPU times: user 20.9 ms, sys: 8.32 ms, total: 29.3 ms
Wall time: 383 ms


Show all columns in the Neighbourhoods boundaries geodata

In [16]:
%%time
pu.show_df(gpd.read_file(shp_filepath).convert_dtypes())

column,_id,AREA_ID,AREA_ATTR_ID,PARENT_AREA_ID,AREA_SHORT_CODE,AREA_LONG_CODE,AREA_NAME,AREA_DESC,CLASSIFICATION,CLASSIFICATION_CODE,OBJECTID,geometry
dtype,Int64,Int64,Int64,Int64,string[python],string[python],string[python],string[python],string[python],string[python],Int64,geometry
nans,0,0,0,0,0,0,0,0,0,0,0,0
0,1,2502366,26022881,0,174,174,South Eglinton-Davisville,South Eglinton-Davisville (174),Not an NIA or Emerging Neighbourhood,,17824737,"MULTIPOLYGON (((-79.38635 43.69783, -79.38623 ..."
1,2,2502365,26022880,0,173,173,North Toronto,North Toronto (173),Not an NIA or Emerging Neighbourhood,,17824753,"MULTIPOLYGON (((-79.39744 43.70693, -79.39837 ..."
2,3,2502364,26022879,0,172,172,Dovercourt Village,Dovercourt Village (172),Not an NIA or Emerging Neighbourhood,,17824769,"MULTIPOLYGON (((-79.43411 43.66015, -79.43537 ..."
3,4,2502363,26022878,0,171,171,Junction-Wallace Emerson,Junction-Wallace Emerson (171),Not an NIA or Emerging Neighbourhood,,17824785,"MULTIPOLYGON (((-79.43870 43.66766, -79.43841 ..."
4,5,2502362,26022877,0,170,170,Yonge-Bay Corridor,Yonge-Bay Corridor (170),Not an NIA or Emerging Neighbourhood,,17824801,"MULTIPOLYGON (((-79.38404 43.64497, -79.38502 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
153,154,2502213,26022728,0,001,001,West Humber-Clairville,West Humber-Clairville (1),Not an NIA or Emerging Neighbourhood,,17827185,"MULTIPOLYGON (((-79.59037 43.73401, -79.58942 ..."
154,155,2502212,26022727,0,024,024,Black Creek,Black Creek (24),Neighbourhood Improvement Area,NIA,17827201,"MULTIPOLYGON (((-79.51915 43.77399, -79.51901 ..."
155,156,2502211,26022726,0,023,023,Pelmo Park-Humberlea,Pelmo Park-Humberlea (23),Not an NIA or Emerging Neighbourhood,,17827217,"MULTIPOLYGON (((-79.53225 43.73505, -79.52938 ..."
156,157,2502210,26022725,0,022,022,Humbermede,Humbermede (22),Neighbourhood Improvement Area,NIA,17827233,"MULTIPOLYGON (((-79.52813 43.74425, -79.52721 ..."


CPU times: user 181 ms, sys: 123 µs, total: 182 ms
Wall time: 366 ms


### Bike Share Station Info

#### Extract

Get info for for currently active stations by querying the [GBFS](https://gbfs.org/#) [`/station_information`](https://github.com/MobilityData/gbfs/blob/v2.0/gbfs.md#station_informationjson) [endpoint](https://github.com/MobilityData/gbfs/blob/v2.0/gbfs.md#version-endpoints) from the [API for the Bike Share Toronto network](https://tor.publicbikesystem.net/customer/gbfs/v2/gbfs.json)

In [17]:
%%time
# get network info URL from feeds file
query = f"""
        WITH t1 AS (
            SELECT column0 AS country_code,
                   column1 AS name,
                   column2 AS location,
                   column3 AS system_id,
                   column4 AS url,
                   column5 AS auto_discovery_url,
                   column6 AS authentication_info
            FROM '{url_spec}'
            WHERE country_code <> 'Country Code'
            AND location LIKE '%Toronto%'
        )
        SELECT auto_discovery_url
        FROM t1
        """
url_feed = run_sql_query(query).squeeze()

# query network info URL to get station info URL
url_info = [
    f
    for f in json.loads(requests.get(url_feed).content)['data']['en']['feeds']
    if f['name'] == 'station_information'
][0]['url']

# query station info (API) URL
df_info_raw = (
    pd.DataFrame.from_records(
        json.loads(requests.get(url_info).content)['data']['stations']
    )
)
with pd.option_context('display.max_columns', None):
    pu.show_df(df_info_raw)

column,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,is_charging_station,geofenced_capacity,rental_methods,is_virtual_station,groups,obcn,nearby_distance,_bluetooth_id,_ride_code_support,rental_uris,post_code,is_valet_station,cross_street
dtype,object,object,object,float64,float64,float64,object,int64,bool,int64,object,bool,object,object,float64,object,bool,object,object,object,object
nans,0,0,0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0,292,776,631
0,7000,Fort York Blvd / Capreol Ct,REGULAR,43.639832,-79.395954,0.0,Fort York Blvd / Capreol Ct,35,False,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],647-643-9607,500.0,,True,{},,,
1,7001,Wellesley Station Green P,ELECTRICBIKESTATION,43.664964,-79.383550,0.0,Yonge / Wellesley,23,True,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],416-617-9576,500.0,,True,{},M4Y 1G7,,
2,7002,St. George St / Bloor St W,REGULAR,43.667333,-79.399429,0.0,St. George St / Bloor St W,19,False,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],647-643-9615,500.0,,True,{},,,
3,7003,Madison Ave / Bloor St W,REGULAR,43.667158,-79.402761,,Madison Ave / Bloor St W,15,False,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],647-631-4587,500.0,,True,{},,,
4,7005,King St W / York St,REGULAR,43.648001,-79.383177,0.0,King St W / York St,23,False,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],647-643-9693,500.0,,True,{},,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
788,7926,McRae Dr / Laird Dr - SMART,SMARTMAPFRAME,43.709793,-79.363892,0.0,McRae Dr / Laird Dr,24,False,0,"[KEY, TRANSITCARD, PHONE]",False,[],Ward - 15( 180 Laird Dr),300.0,,True,{},M4G 3V7,,
789,7927,Strachan Ave / East Liberty St - SMART,SMARTMAPFRAME,43.639065,-79.410810,0.0,Strachan Ave / East Liberty St,24,False,0,"[KEY, TRANSITCARD, PHONE]",False,[],Ward-10 (39 East Liberty )st,100.0,,True,{},M6K 0A7,,
790,7928,Simcoe St / Pullan Pl,REGULAR,43.651053,-79.387649,0.0,Simcoe St / Pullan Pl,31,False,0,"[KEY, TRANSITCARD, CREDITCARD, PHONE]",False,[],Ward- 10( 180 Simcoe St),100.0,,True,{},M5T 2W5,,
791,7929,Spadina Ave / Bulwer St- SMART,SMARTMAPFRAME,43.649354,-79.396757,0.0,Spadina Ave / Bulwer St- SMART,12,False,0,"[KEY, TRANSITCARD, PHONE]",False,[],Ward 10 -( 174 Spadina Ave),100.0,,True,{},M5T 2C2,,


CPU times: user 654 ms, sys: 128 ms, total: 782 ms
Wall time: 1.63 s


#### Transform

In order to save data with columns of a `struct` or `object` datatype, apply JSON encoding to all values in these columns in order to handle values with an empty `object` (eg. empty Python `list`)

In [18]:
%%time
df_info_raw = (
    df_info_raw
    .assign(
        rental_methods=lambda df: df['rental_methods'].apply(json.dumps),
        is_virtual_station=lambda df: df['is_virtual_station'].apply(json.dumps),
        rental_uris=lambda df: df['rental_uris'].apply(json.dumps),
        groups=lambda df: df['groups'].apply(json.dumps),
    )
    .convert_dtypes()
)
with pd.option_context('display.max_columns', None):
    pu.show_df(df_info_raw)

column,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,is_charging_station,geofenced_capacity,rental_methods,is_virtual_station,groups,obcn,nearby_distance,_bluetooth_id,_ride_code_support,rental_uris,post_code,is_valet_station,cross_street
dtype,string[python],string[python],string[python],Float64,Float64,Float64,string[python],Int64,boolean,Int64,string[python],string[python],string[python],string[python],Float64,string[python],boolean,string[python],string[python],boolean,string[python]
nans,0,0,0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0,292,776,631
0,7000,Fort York Blvd / Capreol Ct,REGULAR,43.639832,-79.395954,0.0,Fort York Blvd / Capreol Ct,35,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9607,500.0,,True,{},,,
1,7001,Wellesley Station Green P,ELECTRICBIKESTATION,43.664964,-79.38355,0.0,Yonge / Wellesley,23,True,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],416-617-9576,500.0,,True,{},M4Y 1G7,,
2,7002,St. George St / Bloor St W,REGULAR,43.667333,-79.399429,0.0,St. George St / Bloor St W,19,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9615,500.0,,True,{},,,
3,7003,Madison Ave / Bloor St W,REGULAR,43.667158,-79.402761,,Madison Ave / Bloor St W,15,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-631-4587,500.0,,True,{},,,
4,7005,King St W / York St,REGULAR,43.648001,-79.383177,0.0,King St W / York St,23,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9693,500.0,,True,{},,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
788,7926,McRae Dr / Laird Dr - SMART,SMARTMAPFRAME,43.709793,-79.363892,0.0,McRae Dr / Laird Dr,24,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward - 15( 180 Laird Dr),300.0,,True,{},M4G 3V7,,
789,7927,Strachan Ave / East Liberty St - SMART,SMARTMAPFRAME,43.639065,-79.41081,0.0,Strachan Ave / East Liberty St,24,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward-10 (39 East Liberty )st,100.0,,True,{},M6K 0A7,,
790,7928,Simcoe St / Pullan Pl,REGULAR,43.651053,-79.387649,0.0,Simcoe St / Pullan Pl,31,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],Ward- 10( 180 Simcoe St),100.0,,True,{},M5T 2W5,,
791,7929,Spadina Ave / Bulwer St- SMART,SMARTMAPFRAME,43.649354,-79.396757,0.0,Spadina Ave / Bulwer St- SMART,12,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward 10 -( 174 Spadina Ave),100.0,,True,{},M5T 2C2,,


CPU times: user 24.8 ms, sys: 287 µs, total: 25.1 ms
Wall time: 24.6 ms


Use the boundaries geodata retrieved above to append the neighbourhood and census tract to each bike share station

In [19]:
%%time
df_info = (
    gpu.get_data_with_neighbourhood(
        # load neighbourhoods for city
        gpd.read_file(shp_filepath),
        (
            gpu.get_data_with_neighbourhood(
                # load census tracts for city
                (
                    gpd.read_file(shp_filepath_census)
                    .query(
                        "(PRUID == '35') & "
                        "(CTUID.str.startswith('535')) & "
                        "(CTNAME.str.startswith('01') | "
                        "CTNAME.str.startswith('02') | "
                        "CTNAME.str.startswith('03') | "
                        "CTNAME.str.startswith('00'))"
                    )
                    .reset_index(drop=True)
                    .to_crs(crs)
                    .drop(columns=['DGUID', 'CTNAME', 'LANDAREA', 'PRUID'])
                    .convert_dtypes()
                ),
                df_info_raw.assign(row_id=lambda df: range(len(df))),
                "lat",
                "lon",
                "row_id",
                ['row_id', "CTUID", "geometry"],
                'CTUID',
                crs,
            )
            .drop(columns=['row_id'])
            .assign(row_id=lambda df: range(len(df)))
        ),
        "lat",
        "lon",
        "row_id",
        ['row_id', "AREA_NAME", "geometry"],
        'AREA_NAME',
        crs,
    )
    .rename(columns={"CTUID": "census_tract_id", "AREA_NAME": "Neighbourhood"})
    .sort_values(by=["station_id"], ignore_index=True)
    .astype({"census_tract_id": pd.StringDtype(), "Neighbourhood": pd.StringDtype()})
    .drop(columns=['row_id'])
    .convert_dtypes()
)
with pd.option_context('display.max_columns', None):
    pu.show_df(df_info)

Extracted neighbourhood name.
Dropped 0 rows with a missing CTUID (geodata) column
Added geodata to data.
Extracted neighbourhood name.
Dropped 0 rows with a missing AREA_NAME (geodata) column
Added geodata to data.


column,station_id,name,physical_configuration,lat,lon,altitude,address,capacity,is_charging_station,geofenced_capacity,rental_methods,is_virtual_station,groups,obcn,nearby_distance,_bluetooth_id,_ride_code_support,rental_uris,post_code,is_valet_station,cross_street,census_tract_id,Neighbourhood
dtype,string[python],string[python],string[python],Float64,Float64,Float64,string[python],Int64,boolean,Int64,string[python],string[python],string[python],string[python],Float64,string[python],boolean,string[python],string[python],boolean,string[python],string[python],string[python]
nans,0,0,0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0,292,773,628,0,0
0,7000,Fort York Blvd / Capreol Ct,REGULAR,43.639832,-79.395954,0.0,Fort York Blvd / Capreol Ct,35,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9607,500.0,,True,{},,,,5350012.01,Harbourfront-CityPlace
1,7001,Wellesley Station Green P,ELECTRICBIKESTATION,43.664964,-79.38355,0.0,Yonge / Wellesley,23,True,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],416-617-9576,500.0,,True,{},M4Y 1G7,,,5350063.06,Church-Wellesley
2,7002,St. George St / Bloor St W,REGULAR,43.667333,-79.399429,0.0,St. George St / Bloor St W,19,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9615,500.0,,True,{},,,,5350061.00,University
3,7003,Madison Ave / Bloor St W,REGULAR,43.667158,-79.402761,,Madison Ave / Bloor St W,15,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-631-4587,500.0,,True,{},,,,5350091.01,Annex
4,7005,King St W / York St,REGULAR,43.648001,-79.383177,0.0,King St W / York St,23,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],647-643-9693,500.0,,True,{},,,,5350014.00,Yonge-Bay Corridor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
785,7926,McRae Dr / Laird Dr - SMART,SMARTMAPFRAME,43.709793,-79.363892,0.0,McRae Dr / Laird Dr,24,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward - 15( 180 Laird Dr),300.0,,True,{},M4G 3V7,,,5350195.02,Leaside-Bennington
786,7927,Strachan Ave / East Liberty St - SMART,SMARTMAPFRAME,43.639065,-79.41081,0.0,Strachan Ave / East Liberty St,24,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward-10 (39 East Liberty )st,100.0,,True,{},M6K 0A7,,,5350008.01,Fort York-Liberty Village
787,7928,Simcoe St / Pullan Pl,REGULAR,43.651053,-79.387649,0.0,Simcoe St / Pullan Pl,31,False,0,"[""KEY"", ""TRANSITCARD"", ""CREDITCARD"", ""PHONE""]",false,[],Ward- 10( 180 Simcoe St),100.0,,True,{},M5T 2W5,,,5350036.00,Kensington-Chinatown
788,7929,Spadina Ave / Bulwer St- SMART,SMARTMAPFRAME,43.649354,-79.396757,0.0,Spadina Ave / Bulwer St- SMART,12,False,0,"[""KEY"", ""TRANSITCARD"", ""PHONE""]",false,[],Ward 10 -( 174 Spadina Ave),100.0,,True,{},M5T 2C2,,,5350039.00,Kensington-Chinatown


CPU times: user 2.38 s, sys: 11.5 ms, total: 2.39 s
Wall time: 2.46 s


#### Load

Export station info to disk

In [20]:
fname_prefix = "stations_info_raw"
_ = df_info_raw.pipe(flut.load, raw_data_dir, fname_prefix, my_timezone, True)

Exported 793 rows of stations_info_raw data to /home/jovyan/data/raw/systems/toronto/stations_info_raw__20240310_172839.parquet.gzip


Export station info with neighbourhood and census tract to disk

In [21]:
fname_prefix = "stations_info"
_ = df_info.pipe(flut.load, raw_data_dir, fname_prefix, my_timezone, True)

Exported 790 rows of stations_info data to /home/jovyan/data/raw/systems/toronto/stations_info__20240310_172839.parquet.gzip


### Colleges and Universities

#### Extract

In [22]:
df_colleges_univs = (
    pd.DataFrame.from_dict(coll_univ_locations, orient="index")
    .reset_index()
    .reset_index()
    .rename(
        columns={
            "level_0": "cu_id",
            "index": "cu_name",
        }
    )
)

**Notes**

1. The campus locations are listed below
   - [Centennial College](https://www.centennialcollege.ca/locations)
   - [George Brown College](https://www.georgebrown.ca/about/campuses-locations)
   - [Humber College](https://humber.ca/about-humber/campuses-facilities/)
   - [Toronto Metropolitan University](https://www.torontomu.ca/maps/)
   - [Seneca College](https://www.senecapolytechnic.ca/campuses.html)
   - [University of Toronto](https://www.utoronto.ca/university-life/campuses)
   - [York University](https://www.yorku.ca/campuses/)
2. Only campuses within the city of Toronto are used here.

#### Transform

Use the boundaries geodata retrieved above to append the neighbourhood and census tract to each college and university location

In [23]:
%%time
df_colleges_univs = (
    gpu.get_data_with_neighbourhood(
        # load neighbourhoods for city
        gpd.read_file(shp_filepath),
        (
            gpu.get_data_with_neighbourhood(
                # load census tracts for city
                (
                    gpd.read_file(shp_filepath_census)
                    .query(
                        "(PRUID == '35') & "
                        "(CTUID.str.startswith('535')) & "
                        "(CTNAME.str.startswith('01') | "
                        "CTNAME.str.startswith('02') | "
                        "CTNAME.str.startswith('03') | "
                        "CTNAME.str.startswith('00'))"
                    )
                    .reset_index(drop=True)
                    .to_crs(crs)
                    .drop(columns=['DGUID', 'CTNAME', 'LANDAREA', 'PRUID'])
                    .convert_dtypes()
                ),
                df_colleges_univs,
                "lat",
                "lon",
                "cu_id",
                ['cu_id', "CTUID", "geometry"],
                'CTUID',
                crs,
            )
        ),
        "lat",
        "lon",
        "cu_id",
        ['cu_id', "AREA_NAME", "geometry"],
        'AREA_NAME',
        crs,
    )
    .rename(columns={"CTUID": "census_tract_id", "AREA_NAME": "Neighbourhood"})
    .sort_values(by=["cu_id"], ignore_index=True)
    .astype({"census_tract_id": pd.StringDtype(), "Neighbourhood": pd.StringDtype()})
    .convert_dtypes()
)
with pd.option_context('display.max_columns', None):
    pu.show_df(df_colleges_univs)

Extracted neighbourhood name.
Dropped 0 rows with a missing CTUID (geodata) column
Added geodata to data.
Extracted neighbourhood name.
Dropped 0 rows with a missing AREA_NAME (geodata) column
Added geodata to data.


column,cu_id,cu_name,lat,lon,census_tract_id,Neighbourhood
dtype,Int64,string[python],Float64,Float64,string[python],string[python]
nans,0,0,0,0,0,0
0,0,Centennial College (Progress Campus),43.783936,-79.228288,5350363.07,Woburn North
1,1,Centennial College (Morningside Campus),43.786243,-79.193541,5350362.03,Highland Creek
2,2,George Brown College (St James Campus),43.651708,-79.370745,5350016.0,Moss Park
3,3,George Brown College (Casa Loma Campus),43.676259,-79.410209,5350117.0,Casa Loma
4,4,George Brown College (Waterfront Campus),43.644076,-79.365148,5350017.02,St Lawrence-East Bayfront-The Islands
5,5,Humber College (North Campus),43.73031,-79.606184,5350248.04,West Humber-Clairville
6,6,Humber College (Lakeshore Campus),43.596058,-79.520169,5350205.0,New Toronto
7,7,OCAD University,43.653133,-79.391391,5350036.0,Kensington-Chinatown
8,8,Seneca College (Markham Campus),43.7955,-79.3496,5350324.03,Hillcrest Village
9,9,Seneca College (Newnham Campus),43.7955,-79.3496,5350324.03,Hillcrest Village


CPU times: user 2.38 s, sys: 994 µs, total: 2.38 s
Wall time: 2.43 s


#### Load

Export to disk

In [24]:
%%time
fname_prefix = "colleges_univs"
_ = df_colleges_univs.pipe(
    flut.load,
    raw_data_dir,
    fname_prefix,
    my_timezone,
    True,
)

Exported 17 rows of colleges_univs data to /home/jovyan/data/raw/systems/toronto/colleges_univs__20240310_172841.parquet.gzip
CPU times: user 1.94 ms, sys: 0 ns, total: 1.94 ms
Wall time: 1.74 ms


### Daily Weather Data

#### Extract

Get weather station ID

In [25]:
%%time
station = Stations().nearby(**weather_station_near_coords).fetch(1)
station_id = station.index.get_level_values(0)[0]

CPU times: user 318 ms, sys: 11.8 ms, total: 330 ms
Wall time: 528 ms


Run sanity checks to verify correct weather station is returned

In [26]:
for sub_string in ['Toronto', 'Airport']:
    assert sub_string in station['name'].squeeze()

Get daily weather data from selected weather station

In [27]:
%%time
df_weather = Daily(station_id, weather_start, weather_end).fetch()



CPU times: user 180 ms, sys: 9.21 ms, total: 189 ms
Wall time: 1.23 s


#### Transform

Append weather station ID to daily weather data

In [28]:
df_weather = (
    df_weather
    .assign(station_id=station_id)
    .reset_index()
    .convert_dtypes()
)
pu.show_df(df_weather)

column,time,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,station_id
dtype,datetime64[ns],Float64,Float64,Float64,Float64,Int64,Int64,Float64,Int64,Float64,Int64,string[python]
nans,0,0,0,0,7,1448,75,9,721,15,1915,0
0,2018-01-01,-15.0,-21.3,-8.7,0.0,110,,19.5,,1029.1,,71624
1,2018-01-02,-10.5,-13.1,-7.8,1.0,110,241,25.6,,1025.0,,71624
2,2018-01-03,-9.9,-13.5,-6.3,0.0,100,235,23.2,,1017.4,,71624
3,2018-01-04,-14.7,-20.5,-8.9,0.0,90,314,27.5,,1013.8,,71624
4,2018-01-05,-19.0,-23.0,-15.0,0.0,80,311,26.7,,1019.1,,71624
...,...,...,...,...,...,...,...,...,...,...,...,...
1910,2023-03-27,2.9,2.0,5.0,4.9,,55,9.4,,1018.0,,71624
1911,2023-03-28,3.5,1.0,6.0,0.1,,234,10.9,,1021.9,,71624
1912,2023-03-29,1.5,-3.9,8.0,1.6,,250,24.6,,1018.4,,71624
1913,2023-03-30,-1.3,-6.5,4.0,0.0,,251,14.0,,1026.8,,71624


#### Load

Export to disk

In [29]:
if not fpaths_weather:
    flut.load(
        df_weather,
        raw_data_dir,
        'daily_weather',
        my_timezone,
        verbose=True,
    )

Exported 1,915 rows of daily_weather data to /home/jovyan/data/raw/systems/toronto/daily_weather__20240310_172843.parquet.gzip


### Downtown and East-West Adjacent Neighbourhoods

#### Extract

A [large number of Toronto's bike share stations are located in Downtown Toronto](https://bikesharetoronto.com/system-map/). Get neighbourhoods within downtown Toronto and those located immediately to the East and West of Downtown Toronto

In [30]:
df_downtown_neighs = pd.DataFrame.from_records(neighs)

#### Transform

In [31]:
df_downtown_neighs = df_downtown_neighs.convert_dtypes()
pu.show_df(df_downtown_neighs)

column,Neighbourhood,Location,is_downtown
dtype,string[python],string[python],boolean
nans,0,0,0
0,University,Downtown,True
1,Kensington-Chinatown,Downtown,True
2,Wellington Place,Downtown,True
3,Harbourfront-CityPlace,Downtown,True
4,Bay-Cloverhill,Downtown,True
5,Yonge-Bay Corridor,Downtown,True
6,St Lawrence-East Bayfront-The Islands,Downtown,True
7,Church-Wellesley,Downtown,True
8,Downtown Yonge East,Downtown,True
9,North St.James Town,Downtown,True


**Notes**

1. There are [several definitions of Downtown Toronto](https://www.blogto.com/city/2018/04/downtown-toronto-canada/)'s boundaries. For this project, the boundaries are taken by searching for *Downtown Toronto* on Google Maps.

#### Load

Export to disk

In [32]:
if not fpath_downtown_neighs:
    flut.load(
        df_downtown_neighs,
        raw_data_dir,
        'downtown_neighbourhoods',
        my_timezone,
        verbose=True,
    )

Exported 24 rows of downtown_neighbourhoods data to /home/jovyan/data/raw/systems/toronto/downtown_neighbourhoods__20240310_172843.parquet.gzip


### Bike Share Network Expansion Plans

#### Extract

Show the planned growth in station footprint that was announced as part of a four-year expansion plan on August 8, 2022 for

1. stations
   - [1](https://learn.sharedusemobilitycenter.org/casestudy/bike-share-torontos-four-year-growth-plan-a-data-driven-community-focused-network-expansion/) (**Operations** section)
   - [2](https://bikesharetoronto.com/news/4-year-growth/)
   - [3](https://bikesharetoronto.com/news/four-year-growth-plan/) (table 3 in section 3.1 on page 53)
   - [4](https://www.toronto.ca/wp-content/uploads/2023/06/97f5-2022-Cycling-Year-in-Review-Final.pdf) (page 3)
2. bikes
   - [1](https://bikesharetoronto.com/news/four-year-growth-plan/) (page 7)
   - [2](https://www.toronto.ca/wp-content/uploads/2023/06/97f5-2022-Cycling-Year-in-Review-Final.pdf) (page 3)

where the listed totals for stations in 1,2 and 3 and for bikes are assumed to correspond to the end of 2022

In [33]:
df_network_size = pd.DataFrame.from_records(
    [
        {
            'year': 2023,
            'trips': 5_800_000,
            'num_stations': 710+110,
            'num_bikes': 7165+945,
            'frac_neighs_with_bikeshare': None,
        },
        {
            'year': 2024,
            'trips': 7_000_000,
            'num_stations': 710+110+110,
            'num_bikes': 7165+(945*2),
            'frac_neighs_with_bikeshare': None,
        },
        {
            'year': 2025,
            'trips': 8_200_000,
            'num_stations': 710+110+110+115,
            'num_bikes': 7165+(945*3),
            'frac_neighs_with_bikeshare': None,
        },
    ]
)

**Notes**

1. Trips totals for future years are as of January 2023 and are based on a rough approximation performed personally and were not taken from published data.

Convert datatypes

In [34]:
df_network_size = (
    df_network_size
    .astype({"frac_neighs_with_bikeshare": pd.Float64Dtype()})
    .convert_dtypes()
)
pu.show_df(df_network_size)

column,year,trips,num_stations,num_bikes,frac_neighs_with_bikeshare
dtype,Int64,Int64,Int64,Int64,Float64
nans,0,0,0,0,3
0,2023,5800000,820,8110,
1,2024,7000000,930,9055,
2,2025,8200000,1045,10000,


#### Load

Export to disk

In [35]:
if not fpath_expansion:
    flut.load(
        df_network_size,
        raw_data_dir,
        'network_expansion',
        my_timezone,
        verbose=True,
    )

Exported 3 rows of network_expansion data to /home/jovyan/data/raw/systems/toronto/network_expansion__20240310_172843.parquet.gzip


### Public Transit (Train) Routes in Toronto

#### Extract and Transform

Get url of `.zip` file containing `.shp` file with geo data for train transit lines

In [36]:
%%time
package = requests.get(url, params=train_line_params).json()
files = package["result"]["resources"]
matching_records = []
for idx, resource in enumerate(files):
    if not resource["datastore_active"]:
        url = base_url + "/api/3/action/resource_show?id=" + resource["id"]
        resource_metadata = requests.get(url).json()
        matching_records.append(resource_metadata)
shp_url = matching_records[0]["result"]["url"]

CPU times: user 44.5 ms, sys: 669 µs, total: 45.1 ms
Wall time: 359 ms


Assemble path to local folder where `.zip` file contents will be extracted

In [37]:
local_fpath = os.path.join(raw_data_dir, os.path.basename(os.path.splitext(shp_url)[0]))

#### Load

Download `.zip` file and extract locally, if `.zip` file was not previously downloaded

In [38]:
if not os.path.exists(local_fpath):
    # Read .zip file containing .shp file for train lines from URL found above
    # (.zip file will be read into memory instead of downloading .zip file to local disk)
    r = requests.get(shp_url)
    myzip = ZipFile(BytesIO(r.content))

    # extract .zip file
    myzip.extractall(local_fpath)
    print(f"Extracted folder containing .shp file to {local_fpath}")
else:
    print(f"Found folder at {local_fpath}. Did nothing.")

Extracted folder containing .shp file to ../data/raw/systems/toronto/ttc-subway-shapefile-wgs84


Verify that train routes `.shp` file was downloaded and exists locally in the correct filepath

In [39]:
assert os.path.exists(local_fpath)

Extract path the `.shp` file with transit lines geodata

In [40]:
fpath_transit_lines = glob(os.path.join(local_fpath, '*.shp'))[0]

Show all columns in the transit lines geodata

In [41]:
%%time
pu.show_df(gpd.read_file(fpath_transit_lines).convert_dtypes())

column,OBJECTID,ROUTE_NAME,RID,geometry
dtype,Int64,string[python],Int64,geometry
nans,0,0,0,0
0,53420,LINE 1 (YONGE-UNIVERSITY),1,"LINESTRING (-79.52813 43.79677, -79.52689 43.7..."
1,53421,LINE 2 (BLOOR - DANFORTH),2,"LINESTRING (-79.53540 43.63781, -79.53386 43.6..."
2,53422,LINE 3 (SCARBOROUGH),3,"LINESTRING (-79.26332 43.73266, -79.26332 43.7..."
3,53423,LINE 4 (SHEPPARD),4,"LINESTRING (-79.41113 43.76145, -79.40981 43.7..."


CPU times: user 18.1 ms, sys: 0 ns, total: 18.1 ms
Wall time: 17.9 ms


### Public Holidays in Canada

#### Extract

In [42]:
%%time
ca_holidays = holidays.CA(
    years=pd.date_range(start=weather_start, end=weather_end, freq='365d').year,
    subdiv='ON',
)

CPU times: user 40.1 ms, sys: 4.02 ms, total: 44.2 ms
Wall time: 43.7 ms


#### Transform

In [43]:
%%time
df_holidays = (
    pd.DataFrame.from_records(
        [
            {
                "date": datetime.strptime(str(date), '%Y-%m-%d'),
                'holiday_name': holiday_name,
            }
            for date, holiday_name in ca_holidays.items()
        ]
    )
    .assign(is_holiday=True)
    .convert_dtypes()
)
pu.show_df(df_holidays)

column,date,holiday_name,is_holiday
dtype,datetime64[ns],string[python],boolean
nans,0,0,0
0,2018-01-01,New Year's Day,True
1,2018-03-30,Good Friday,True
2,2018-07-01,Canada Day,True
3,2018-09-03,Labor Day,True
4,2018-12-25,Christmas Day,True
5,2018-02-19,Family Day,True
6,2018-05-21,Victoria Day,True
7,2018-10-08,Thanksgiving Day,True
8,2018-12-26,Boxing Day,True
9,2019-01-01,New Year's Day,True


CPU times: user 7.34 ms, sys: 0 ns, total: 7.34 ms
Wall time: 7.1 ms


#### Load

Export to disk

In [44]:
if not fpath_holidays:
    flut.load(
        df_holidays,
        raw_data_dir,
        'holidays',
        my_timezone,
        verbose=True,
    )

Exported 48 rows of holidays data to /home/jovyan/data/raw/systems/toronto/holidays__20240310_172844.parquet.gzip


## Discussion

### Conclusion

1. This step has
   - downloaded data for
     - Bike Share Toronto ridership from 2018 to 2023, inclusive
     - City of Toronto Census Tract Boundaries Geodata
     - City of Toronto Neighbourhood Boundaries Geodata
     - Bike Share Toronto station info
     - City of Toronto colleges and universities
     - City of Toronto Daily Weather Data
     - City of Toronto Public Transit (Train) Routes Geodata
   - generated data for
     - City of Toronto Downtown Neighbourhoods
     - Bike Share Toronto Network Expandsion Plans (2023 to 2025)

## Next Step

The next step will process the raw bike share trips data that was retrieved during this step.

## Version Information

In [45]:
packages = [
    'requests',
    'meteostat',
    'geopandas',
    'holidays',
    'pandas',
    'pyarrow',
]
print(
    watermark(
        updated=True,
        current_date=True,
        current_time=True,
        timezone=True,
        custom_time="%Y-%m-%d %H:%M:%S %Z",
        python=True,
        machine=True,
        packages=','.join(packages),
    )
)

Last updated: 2024-03-10 21:28:44 UTC

Python implementation: CPython
Python version       : 3.11.8
IPython version      : 8.22.2

requests : 2.31.0
meteostat: 1.6.7
geopandas: 0.14.3
holidays : 0.44
pandas   : 2.2.1
pyarrow  : 15.0.0

Compiler    : GCC 12.3.0
OS          : Linux
Release     : 6.6.10-76060610-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit

