# Get Data Part 2/2

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import sys
from io import BytesIO
from itertools import product
from glob import glob
from typing import Dict
from zipfile import ZipFile

import geopandas as gpd
import pandas as pd
import requests
from tqdm.contrib import concurrent as concurrent_tq
from watermark import watermark

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
PROJ_ROOT = os.path.join(os.pardir)
src_dir = os.path.join(PROJ_ROOT, "src")
sys.path.append(src_dir)

In [4]:
%aimport file_utils
import file_utils as flut

%aimport pandas_utils
import pandas_utils as pu

## About

Retrieve the following datasets for the City of Toronto

1. Public Transit (Bus) Stops
2. Public Library Branch Locations
3. Places of Interest
4. Cultural Hotspots
5. Cycling Network

## User Inputs

In [5]:
# Toronto open datasets
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
params_pub_trans_stops = {"id": "ttc-routes-and-schedules"}
params_library = {'id': "library-branch-general-information"}
params_poi = {"id": "places-of-interest-and-toronto-attractions"}
params_ch = {"id": "cultural-hotspot-points-of-interest"}
params_cycle = {"id": "cycling-network"}
cycle_network_cols = [
    '_id', 'OBJECTID', 'FROM_STREET', 'TO_STREET', 'STREET_NAME', 'geometry'
]

# geodata
crs = 4326
epsg = 4536  # gives distance in metres

sid_cols = [
    'station_id',
    'name',
    'physical_configuration',
    'lat',
    'lon',
    'is_charging_station',
    'capacity',
]

# export
my_timezone = 'US/Eastern'

In [6]:
data_dir = os.path.join(PROJ_ROOT, 'data')
raw_data_dir = os.path.join(data_dir, 'raw', 'systems', 'toronto')

In [7]:
def get_open_data_package_resources(
    base_url: str, params: Dict[str, str]
) -> pd.DataFrame:
    """."""
    url = base_url + "/api/3/action/package_show"
    package = requests.get(url, params=params).json()
    df = pd.DataFrame.from_records(package["result"]["resources"])
    return df


def download_zip_file(raw_data_dir: str, url: str) -> str:
    """."""
    url_fname = os.path.basename(url)
    zip_filepath = os.path.join(raw_data_dir, os.path.splitext(url_fname)[0])
    if not os.path.exists(zip_filepath):
        extracted_dir = os.path.join(
            raw_data_dir, os.path.splitext(url_fname)[0]
        )
        if not os.path.exists(extracted_dir):
            shp_file_fpath = os.path.join(raw_data_dir, url_fname)
            r = requests.get(url)
            with open(shp_file_fpath, "wb") as f:
                f.write(r.content)
        if not os.path.exists(extracted_dir):
            zip_file_fpath_destination = os.path.join(raw_data_dir, url_fname)
            shutil.unpack_archive(zip_file_fpath_destination, zip_filepath)
        print(f"Retrieved geodata & saved to {os.path.abspath(zip_filepath)}")
    else:
        print(
            f"Found existing geodata at {os.path.abspath(zip_filepath)}. Did "
            "not download."
        )
    return zip_filepath


def extract_coords_from_geometry(
    gdf: gpd.GeoDataFrame, lat_col_name: str, lon_col_name: str
) -> gpd.GeoDataFrame:
    """."""
    gdf[lat_col_name] = gdf['geometry'].explode(index_parts=False).x
    gdf[lon_col_name] = gdf['geometry'].explode(index_parts=False).y
    return gdf


def run_parallel(
    inputs_product: product,
    fn,
    chunk_size: int=100,
) -> pd.DataFrame:
    """Run function against multiple inputs in parallel."""
    iterables = list(inputs_product)
    outputs = list(
        concurrent_tq.process_map(
            fn,
            *zip(*iterables),
            max_workers=12,
            chunksize=chunk_size,
        )
    )
    return outputs

## Get Data

### Public Transit Stops

In [8]:
%%time
df_stops_geo = get_open_data_package_resources(base_url, params_pub_trans_stops)
url_geo_file = df_stops_geo['url'].squeeze()
zip_filepath = download_zip_file(raw_data_dir, url_geo_file)
df_public_transit_stops = pd.read_csv(
    os.path.join(zip_filepath, 'stops.txt'),
    usecols=['stop_id', 'stop_code', 'stop_name', 'stop_lat', 'stop_lon'],
).convert_dtypes()
with pd.option_context('display.max_columns', None):
    pu.show_df(df_public_transit_stops)

Found existing geodata at /home/jovyan/data/raw/systems/toronto/opendata_ttc_schedules. Did not download.


column,stop_id,stop_code,stop_name,stop_lat,stop_lon
dtype,Int64,Int64,string[python],Float64,Float64
nunique,9418,9418,7609,9183,9287
missing,0,0,0,0,0
0,262,662,Danforth Rd at Kennedy Rd,43.714379,-79.260939
1,263,929,Davenport Rd at Bedford Rd,43.674448,-79.399659
2,264,940,Davenport Rd at Dupont St,43.675511,-79.401938
3,265,1871,Davisville Ave at Cleveland St,43.702088,-79.378112
4,266,11700,Disco Rd at Attwell Dr,43.701362,-79.594843
...,...,...,...,...,...
9413,24782,16482,Bloor St West at Acorn Ave,43.640547,-79.541534
9414,24783,16483,Pape Station,43.679781,-79.344912
9415,24784,16484,Beecroft Rd at Sheppard Ave West North Side,43.761367,-79.413167
9416,24785,16485,Don Mills Rd at Van Horne Ave South Side,43.787194,-79.353094


CPU times: user 54.1 ms, sys: 8.11 ms, total: 62.2 ms
Wall time: 269 ms


### Libraries

#### Extract

In [9]:
df_libraries_geo = get_open_data_package_resources(base_url, params_library)
df_libraries_geo = df_libraries_geo.query(
    "(datastore_active == False) & (format == 'CSV')"
)
url_geo_file = df_libraries_geo['url'].iloc[0]
df_lib = pd.read_csv(url_geo_file)
with pd.option_context('display.max_columns', None):
    pu.show_df(df_lib)

column,_id,BranchCode,PhysicalBranch,BranchName,Address,PostalCode,Website,Telephone,SquareFootage,PublicParking,KidsStop,LeadingReading,CLC,DIH,TeenCouncil,YouthHub,AdultLiteracyProgram,Workstations,ServiceTier,Lat,Long,NBHDNo,NBHDName,TPLNIA,WardNo,WardName,PresentSiteYear
dtype,int64,object,int64,object,object,object,object,object,object,object,float64,float64,float64,float64,float64,float64,float64,float64,object,float64,float64,float64,object,float64,float64,object,float64
nunique,112,112,2,112,103,101,107,104,95,35,2,2,2,2,2,2,2,37,5,102,102,88,93,2,25,25,59
missing,0,0,0,0,9,9,4,7,2,12,12,12,12,12,12,12,12,12,0,10,10,12,12,12,12,12,12
0,1,AB,1,Albion,"1515 Albion Road, Toronto, ON, M9V 1B2",M9V 1B2,https://www.tpl.ca/albion,416-394-5170,29000,59,1.0,1.0,1.0,1.0,1.0,1.0,1.0,38.0,DL,43.739826,-79.584096,2.0,Mount Olive-Silverstone-Jamestown,1.0,1.0,Etobicoke North,2017.0
1,2,ACD,1,Albert Campbell,"496 Birchmount Road, Toronto, ON, M1K 1N8",M1K 1N8,https://www.tpl.ca/albertcampbell,416-396-8890,28957,45,0.0,1.0,1.0,1.0,1.0,1.0,0.0,36.0,DL,43.708019,-79.269252,120.0,Clairlea-Birchmount,1.0,20.0,Scarborough Southwest,1971.0
2,3,AD,1,Alderwood,"2 Orianna Drive, Toronto, ON, M8W 4Y1",M8W 4Y1,https://www.tpl.ca/alderwood,416-394-5310,7341,shared,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,NL,43.601944,-79.547252,20.0,Alderwood,0.0,3.0,Etobicoke-Lakeshore,1999.0
3,4,AG,1,Agincourt,"155 Bonis Avenue, Toronto, ON, M1T 3W6",M1T 3W6,https://www.tpl.ca/agincourt,416-396-8943,27000,86,0.0,1.0,1.0,1.0,0.0,1.0,0.0,42.0,DL,43.785167,-79.293430,118.0,Tam O'Shanter-Sullivan,0.0,22.0,Scarborough-Agincourt,1991.0
4,5,AH,1,Armour Heights,"2140 Avenue Road, Toronto, ON, M5M 4M7",M5M 4M7,https://www.tpl.ca/armourheights,416-395-5430,2988,shared,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,NL,43.739337,-79.421889,39.0,Bedford Park-Nortown,0.0,8.0,Eglinton-Lawrence,1982.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,108,WP,1,Woodview Park,"16 Bradstock Road, Toronto, ON, M9M 1M8",M9M 1M8,https://www.tpl.ca/woodview,416-395-5960,4110,shared,0.0,0.0,0.0,0.0,1.0,0.0,0.0,9.0,NL,43.739722,-79.538941,22.0,Humbermede,1.0,7.0,Humber River-Black Creek,1964.0
108,109,WS,1,Woodside Square,"Woodside Square Mall, 1571 Sandhurst Circle, T...",M1V 1V2,https://www.tpl.ca/woodside,416-396-8979,9944,shared,0.0,1.0,0.0,0.0,1.0,0.0,0.0,22.0,NL,43.809463,-79.269548,129.0,Agincourt North,0.0,23.0,Scarborough North,1977.0
109,110,WY,1,Wychwood,"1431 Bathurst Street, Toronto, ON, M5R 3J2",M5R 3J2,https://www.tpl.ca/wychwood,416-393-7683,15798,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,NL,43.682181,-79.417548,96.0,Casa Loma,0.0,12.0,Toronto-St. Paul's,1916.0
110,111,YO,1,Yorkville,"22 Yorkville Avenue, Toronto, ON, M4W 1L4",M4W 1L4,https://www.tpl.ca/yorkville,416-393-7660,9053,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,NL,43.671826,-79.388644,95.0,Annex,0.0,11.0,University-Rosedale,1907.0


#### Transform

In [10]:
df_lib = df_lib.convert_dtypes()
with pd.option_context('display.max_columns', None):
    pu.show_df(df_lib)

column,_id,BranchCode,PhysicalBranch,BranchName,Address,PostalCode,Website,Telephone,SquareFootage,PublicParking,KidsStop,LeadingReading,CLC,DIH,TeenCouncil,YouthHub,AdultLiteracyProgram,Workstations,ServiceTier,Lat,Long,NBHDNo,NBHDName,TPLNIA,WardNo,WardName,PresentSiteYear
dtype,Int64,string[python],Int64,string[python],string[python],string[python],string[python],string[python],string[python],string[python],Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,string[python],Float64,Float64,Int64,string[python],Int64,Int64,string[python],Int64
nunique,112,112,2,112,103,101,107,104,95,35,2,2,2,2,2,2,2,37,5,102,102,88,93,2,25,25,59
missing,0,0,0,0,9,9,4,7,2,12,12,12,12,12,12,12,12,12,0,10,10,12,12,12,12,12,12
0,1,AB,1,Albion,"1515 Albion Road, Toronto, ON, M9V 1B2",M9V 1B2,https://www.tpl.ca/albion,416-394-5170,29000,59,1,1,1,1,1,1,1,38,DL,43.739826,-79.584096,2,Mount Olive-Silverstone-Jamestown,1,1,Etobicoke North,2017
1,2,ACD,1,Albert Campbell,"496 Birchmount Road, Toronto, ON, M1K 1N8",M1K 1N8,https://www.tpl.ca/albertcampbell,416-396-8890,28957,45,0,1,1,1,1,1,0,36,DL,43.708019,-79.269252,120,Clairlea-Birchmount,1,20,Scarborough Southwest,1971
2,3,AD,1,Alderwood,"2 Orianna Drive, Toronto, ON, M8W 4Y1",M8W 4Y1,https://www.tpl.ca/alderwood,416-394-5310,7341,shared,0,0,0,0,0,0,0,7,NL,43.601944,-79.547252,20,Alderwood,0,3,Etobicoke-Lakeshore,1999
3,4,AG,1,Agincourt,"155 Bonis Avenue, Toronto, ON, M1T 3W6",M1T 3W6,https://www.tpl.ca/agincourt,416-396-8943,27000,86,0,1,1,1,0,1,0,42,DL,43.785167,-79.29343,118,Tam O'Shanter-Sullivan,0,22,Scarborough-Agincourt,1991
4,5,AH,1,Armour Heights,"2140 Avenue Road, Toronto, ON, M5M 4M7",M5M 4M7,https://www.tpl.ca/armourheights,416-395-5430,2988,shared,0,0,0,0,0,0,0,5,NL,43.739337,-79.421889,39,Bedford Park-Nortown,0,8,Eglinton-Lawrence,1982
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,108,WP,1,Woodview Park,"16 Bradstock Road, Toronto, ON, M9M 1M8",M9M 1M8,https://www.tpl.ca/woodview,416-395-5960,4110,shared,0,0,0,0,1,0,0,9,NL,43.739722,-79.538941,22,Humbermede,1,7,Humber River-Black Creek,1964
108,109,WS,1,Woodside Square,"Woodside Square Mall, 1571 Sandhurst Circle, T...",M1V 1V2,https://www.tpl.ca/woodside,416-396-8979,9944,shared,0,1,0,0,1,0,0,22,NL,43.809463,-79.269548,129,Agincourt North,0,23,Scarborough North,1977
109,110,WY,1,Wychwood,"1431 Bathurst Street, Toronto, ON, M5R 3J2",M5R 3J2,https://www.tpl.ca/wychwood,416-393-7683,15798,0,0,0,0,0,0,0,0,11,NL,43.682181,-79.417548,96,Casa Loma,0,12,Toronto-St. Paul's,1916
110,111,YO,1,Yorkville,"22 Yorkville Avenue, Toronto, ON, M4W 1L4",M4W 1L4,https://www.tpl.ca/yorkville,416-393-7660,9053,0,0,0,0,0,0,0,0,7,NL,43.671826,-79.388644,95,Annex,0,11,University-Rosedale,1907


#### Load

Export to disk

In [11]:
fpath_public_transit_stops = glob(os.path.join(raw_data_dir, 'public_transit_stops__*.parquet.gzip'))
if not fpath_public_transit_stops:
    flut.load(
        df_lib,
        raw_data_dir,
        'libraries',
        my_timezone,
        verbose=True,
    )

Exported 112 rows of libraries data to /home/jovyan/data/raw/systems/toronto/libraries__20240423_111337.parquet.gzip


### Cultural Hotspots

#### Extract

In [12]:
%%time
df_ch_geo = get_open_data_package_resources(base_url, params_ch)
df_ch_geo = df_ch_geo.query(
    "(datastore_active == False) & (name.str.endswith('4326.geojson'))"
)
url_geo_file = df_ch_geo['url'].iloc[0]
gdf_ch = (
    gpd.read_file(url_geo_file)
    .pipe(extract_coords_from_geometry, 'ch_lat', 'ch_lon')
)
print(gdf_ch.crs)
with pd.option_context('display.max_columns', None):
    pu.show_df(
        gdf_ch[
            ['_id', 'SiteName', 'LoopTourName', 'Interests', 'ch_lat', 'ch_lon', 'geometry']
        ].head(2)
    )

ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/envs/get-data/share/proj failed


EPSG:4326


column,_id,SiteName,LoopTourName,Interests,ch_lat,ch_lon,geometry
dtype,int64,object,object,object,float64,float64,geometry
nunique,2,2,1,2,2,2,2
missing,0,0,0,0,0,0,0
0,1,Up To This Moment (2019),The Future in Mount Dennis,Art,-79.48458,43.68838,MULTIPOINT ((-79.48458 43.68838))
1,2,Untitled (2019),The Future in Mount Dennis,"Art, History",-79.48487,43.68835,MULTIPOINT ((-79.48487 43.68835))


CPU times: user 458 ms, sys: 23.7 ms, total: 481 ms
Wall time: 837 ms


#### Transform

In [13]:
gdf_ch = gdf_ch.convert_dtypes()
pu.show_df(gdf_ch)

column,_id,LoopsGuide,Loop,TourNum,OrderNum,LoopTourName,LoopTourURL,TourLabel,SiteName,Neighbourhood,...,ImageURL,ThumbURL,ImageOrientation,test1,test2,test3,ObjectId,geometry,ch_lat,ch_lon
dtype,Int64,string[python],Int64,Int64,Int64,string[python],string[python],string[python],string[python],string[python],...,string[python],string[python],string[python],string[python],string[python],string[python],Int64,geometry,Float64,Float64
nunique,895,5,3,5,42,49,2,13,874,56,...,820,811,2,60,3,2,895,880,876,872
missing,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,1,York,2,2,7,The Future in Mount Dennis,,,Up To This Moment (2019),Mount Dennis,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Portrait,,,,1,MULTIPOINT ((-79.48458 43.68838)),-79.48458,43.68838
1,2,York,2,2,8,The Future in Mount Dennis,,,Untitled (2019),Mount Dennis,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Portrait,,,,2,MULTIPOINT ((-79.48487 43.68835)),-79.48487,43.68835
2,3,York,2,2,12,The Future in Mount Dennis,,,John Dennis,Mount Dennis,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Portrait,,,,3,MULTIPOINT ((-79.48888 43.68686)),-79.48888,43.68686
3,4,York,2,2,11,The Future in Mount Dennis,,,Mount Dennis Library,Mount Dennis,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Landscape,,,,4,MULTIPOINT ((-79.48833 43.68655)),-79.48833,43.68655
4,5,York,2,2,9,The Future in Mount Dennis,,,Black Creek at Weston Rd.,Mount Dennis,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Portrait,,,,5,MULTIPOINT ((-79.48012 43.68317)),-79.48012,43.68317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
890,891,Etobicoke,1,2,14,Art Along the Lakeshore,,,The Evolution of Transportation (1996),Lake Shore Blvd. W.,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Landscape,,,,891,MULTIPOINT ((-79.54441 43.59170)),-79.54441,43.5917
891,892,Etobicoke,1,2,15,Art Along the Lakeshore,,,501 Streetcar,Lake Shore Blvd. W.,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Portrait,,,,892,MULTIPOINT ((-79.54444 43.59146)),-79.54444,43.59146
892,893,Etobicoke,1,2,16,Art Along the Lakeshore,,,"Land, Sea & Air (2001)",Lake Shore Blvd. W.,...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,https://contrib0.wp.intra.dev-toronto.ca/ext/e...,Landscape,,,,893,MULTIPOINT ((-79.54515 43.59101)),-79.54515,43.59101
893,894,Etobicoke,1,2,17,Art Along the Lakeshore,,,Etobicoke Creek & Marie Curtis Park,Lake Shore Blvd. W.,...,https://www.toronto.ca/data/parks/img/6/1.jpg,https://www.toronto.ca/data/parks/img/6/1.jpg,Landscape,,,,894,MULTIPOINT ((-79.54672 43.58685)),-79.54672,43.58685


#### Load

Export to disk

In [14]:
fpath_ch = glob(os.path.join(raw_data_dir, 'cultural_hotspots__*.parquet.gzip'))
if not fpath_ch:
    flut.load(
        gdf_ch,
        raw_data_dir,
        'cultural_hotspots',
        my_timezone,
        verbose=True,
    )

Exported 895 rows of cultural_hotspots data to /home/jovyan/data/raw/systems/toronto/cultural_hotspots__20240423_111338.parquet.gzip


### Places of Interest

#### Extract

In [15]:
%%time
df_poi_geo = get_open_data_package_resources(base_url, params_poi)
df_poi_geo = df_poi_geo.query(
    "(datastore_active == False) & (name.str.endswith('4326.geojson'))"
)
url_geo_file = df_poi_geo['url'].iloc[0]
gdf_poi = (
    gpd.read_file(url_geo_file)
    .pipe(extract_coords_from_geometry, 'poi_lat', 'poi_lon')
)
print(gdf_poi.crs)
with pd.option_context('display.max_columns', None):
    display(
        gdf_poi[
            ['_id', 'NAME', 'ADDRESS_FULL', 'CATEGORY', 'poi_lat', 'poi_lon', 'geometry']
        ].head(2)
    )

EPSG:4326


Unnamed: 0,_id,NAME,ADDRESS_FULL,CATEGORY,poi_lat,poi_lon,geometry
0,1,BMO Field,170 Princes' Blvd,Sports / Entertainment Venue,-79.418416,43.634663,MULTIPOINT ((-79.41842 43.63466))
1,2,Aga Khan Museum,77 Wynford Dr,Museum,-79.331804,43.727331,MULTIPOINT ((-79.33180 43.72733))


CPU times: user 137 ms, sys: 3.81 ms, total: 140 ms
Wall time: 513 ms


#### Transform

In [16]:
gdf_poi = gdf_poi.convert_dtypes()
pu.show_df(gdf_poi)

column,_id,ADDRESS_INFO,NAME,CATEGORY,PHONE,EMAIL,WEBSITE,GEOID,RECEIVED_DATE,ADDRESS_POINT_ID,...,LINEAR_NAME_ID,WARD,WARD_2003,WARD_2018,MI_PRINX,ATTRACTION,MAP_ACCESS,geometry,poi_lat,poi_lon
dtype,Int64,string[python],string[python],string[python],string[python],string[python],string[python],Int64,string[python],Int64,...,Int64,string[python],Int64,Int64,Int64,string[python],string[python],geometry,Float64,Float64
nunique,174,11,173,13,138,1,172,164,1,160,...,99,24,32,24,160,173,3,164,164,164
missing,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,1,,BMO Field,Sports / Entertainment Venue,416-815-5982,,https://www.bmofield.com/,20229243,,20229243,...,20228,Spadina-Fort York,19,10,4163950,BMO Field is home to the Toronto FC (Major Lea...,Y,MULTIPOINT ((-79.41842 43.63466)),-79.418416,43.634663
1,2,,Aga Khan Museum,Museum,416-646-4677,,https://www.agakhanmuseum.org/,10142948,,10142948,...,7128,Don Valley East,26,16,4094277,"Dedicated to sharing the artistic, intellectua...",Y,MULTIPOINT ((-79.33180 43.72733)),-79.331804,43.727331
2,3,,Scotiabank Arena (formerly Air Canada Centre),Sports / Entertainment Venue,416-815-5500,,https://www.scotiabankarena.com/,7929257,,7929257,...,2962,Spadina-Fort York,28,10,3176821,The Scotiabank Arena is a multi-purpose indoor...,Y,MULTIPOINT ((-79.37880 43.64544)),-79.378801,43.645437
3,4,,Al Green Theatre (at the Miles Nadal Jewish Co...,Performing Arts,416-924-6211,,http://www.algreentheatre.ca/,8418224,,8418224,...,4426,University-Rosedale,20,11,1951177,The Al Green Theatre is used for social and c...,N,MULTIPOINT ((-79.40386 43.66821)),-79.403863,43.668206
4,5,,Alexander Muir Memorial Gardens,Nature/ Park,416-338-4386,,https://www.toronto.ca/data/parks/prd/faciliti...,10154587,,10154587,...,4733,Don Valley West,25,15,4217141,Named after the famous composer Alexander Muir...,Y,MULTIPOINT ((-79.40097 43.72353)),-79.400965,43.723531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,170,,Yonge-Dundas Square,Landmark,416-979-9960,,https://www.ydsquare.ca/,14229809,,14229809,...,3334,Toronto Centre,27,13,3149860,Yonge-Dundas Square is one of the city's large...,Y,MULTIPOINT ((-79.38024 43.65819)),-79.380239,43.658192
170,171,,York Quay Centre,Landmark,416-967-6425,,http://www.harbourfrontcentre.com/,11226853,,11226853,...,4244,Spadina-Fort York,20,10,1995833,The York Quay Centre is part of the Harbourfro...,N,MULTIPOINT ((-79.38291 43.64123)),-79.382906,43.641231
171,172,,Yorkdale Mall,Landmark,416-789-3261,,https://yorkdale.com/,509769,,509769,...,557,Eglinton-Lawrence,15,8,3035044,An upscale shopping centre in the northern par...,Y,MULTIPOINT ((-79.45127 43.72807)),-79.451271,43.728069
172,173,,Young Centre for the Performing Arts,Performing Arts,416-866-8666,,https://www.youngcentre.ca/,30032214,,30032214,...,19560,Spadina-Fort York,28,10,3339458,Opened in 2006 the Young Centre for the Perfor...,Y,MULTIPOINT ((-79.35726 43.65285)),-79.357258,43.65285


#### Load

Export to disk

In [17]:
fpath_poi = glob(os.path.join(raw_data_dir, 'places_of_interest__*.parquet.gzip'))
if not fpath_poi:
    flut.load(
        gdf_poi,
        raw_data_dir,
        'places_of_interest',
        my_timezone,
        verbose=True,
    )

Exported 174 rows of places_of_interest data to /home/jovyan/data/raw/systems/toronto/places_of_interest__20240423_111338.parquet.gzip


### Cycle Paths

#### Extract

In [18]:
%%time
df_params_cycle = get_open_data_package_resources(base_url, params_cycle)
df_params_cycle = df_params_cycle.query(
    "(datastore_active == False) & (name.str.endswith('4326.geojson'))"
)
url_geo_file = df_params_cycle['url'].iloc[0]
gdf_cycle = gpd.read_file(url_geo_file)#[cycle_network_cols]
print(gdf_cycle.crs)
with pd.option_context('display.max_columns', None):
    pu.show_df(gdf_cycle)

EPSG:4326


column,_id,OBJECTID,SEGMENT_ID,INSTALLED,UPGRADED,PRE_AMALGAMATION,STREET_NAME,FROM_STREET,TO_STREET,ROADCLASS,CNPCLASS,SURFACE,OWNER,DIR_LOWORDER,INFRA_LOWORDER,SEPA_LOWORDER,SEPB_LOWORDER,ORIG_LOWORDER_INFRA,DIR_HIGHORDER,INFRA_HIGHORDER,SEPA_HIGHORDER,SEPB_HIGHORDER,ORIG_HIGHORDER,BYLAWED,EDITOR,LAST_EDIT_DATE,UPGRADE_DESCRIPTION,CONVERTED,geometry
dtype,int64,int64,int64,int64,int64,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,datetime64[ns],object,object,geometry
nunique,1445,1445,1445,24,21,1,673,894,880,1,1,1,1,1,19,1,1,1,1,17,1,1,1,1,1,1,1,18,1445
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,1,1,1,2001,2021,,Kilbarry Rd,Highbourne Rd,Oriole Pkwy,,,,,,Sharrows - Wayfinding,,,,,Sharrows - Wayfinding,,,,,,2024-02-28 20:11:16,,2007,"MULTILINESTRING ((-79.40351 43.69526, -79.4030..."
1,2,2,2,2001,2009,,Martin Goodman / Waterfront Trl,Lake Shore Blvd W,Martin Goodman Trl Branch,,,,,,Multi-Use Trail,,,,,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.40364 43.63499, -79.4035..."
2,3,3,3,2001,2011,,Gatineau Hydro Corridor Trl,Givendale Rd,Kennedy Rd,,,,,,Multi-Use Trail,,,,,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.27522 43.74158, -79.2751..."
3,4,4,4,2001,2011,,G Ross Lord Park Trl,Dufferin St,G Ross Lord Park Trl Branch,,,,,,Multi-Use Trail,,,,,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.46772 43.77208, -79.4675..."
4,5,5,5,2001,2012,,Etobicoke Creek Trl,Etobicoke Creek Trl Branch,Forty Third St,,,,,,Multi-Use Trail - Entrance,,,,,Multi-Use Trail - Entrance,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.54812 43.59027, -79.5480..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1440,1441,1441,1441,0,0,,Ontario Dr,Princes' Blvd,Princes' Blvd,,,,,,Bike Lane,,,,,Bike Lane,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.41952 43.63201, -79.4198..."
1441,1442,1442,1442,0,0,,Royal York Rd,Eastbourne Cres,Lake Shore Blvd W,,,,,,Sharrows - Arterial - Connector,,,,,Sharrows - Arterial - Connector,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.49350 43.60451, -79.4934..."
1442,1443,1443,1443,2005,2023,,Main St,Hamstead Ave,Lumsden Ave,,,,,,Sharrows,,,,,Sharrows,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.30584 43.69705, -79.3057..."
1443,1444,1444,1444,2023,0,,Temperance St,Bay St,Yonge St,,,,,,Sharrows,,,,,Bike Lane - Contraflow,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.38107 43.65057, -79.3803..."


CPU times: user 683 ms, sys: 25.2 ms, total: 708 ms
Wall time: 1.1 s


#### Transform

In [19]:
gdf_cycle = gdf_cycle.convert_dtypes()
pu.show_df(gdf_cycle)

column,_id,OBJECTID,SEGMENT_ID,INSTALLED,UPGRADED,PRE_AMALGAMATION,STREET_NAME,FROM_STREET,TO_STREET,ROADCLASS,...,INFRA_HIGHORDER,SEPA_HIGHORDER,SEPB_HIGHORDER,ORIG_HIGHORDER,BYLAWED,EDITOR,LAST_EDIT_DATE,UPGRADE_DESCRIPTION,CONVERTED,geometry
dtype,Int64,Int64,Int64,Int64,Int64,string[python],string[python],string[python],string[python],string[python],...,string[python],string[python],string[python],string[python],string[python],string[python],datetime64[ns],string[python],string[python],geometry
nunique,1445,1445,1445,24,21,1,673,894,880,1,...,17,1,1,1,1,1,1,1,18,1445
missing,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,1,1,1,2001,2021,,Kilbarry Rd,Highbourne Rd,Oriole Pkwy,,...,Sharrows - Wayfinding,,,,,,2024-02-28 20:11:16,,2007,"MULTILINESTRING ((-79.40351 43.69526, -79.4030..."
1,2,2,2,2001,2009,,Martin Goodman / Waterfront Trl,Lake Shore Blvd W,Martin Goodman Trl Branch,,...,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.40364 43.63499, -79.4035..."
2,3,3,3,2001,2011,,Gatineau Hydro Corridor Trl,Givendale Rd,Kennedy Rd,,...,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.27522 43.74158, -79.2751..."
3,4,4,4,2001,2011,,G Ross Lord Park Trl,Dufferin St,G Ross Lord Park Trl Branch,,...,Multi-Use Trail,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.46772 43.77208, -79.4675..."
4,5,5,5,2001,2012,,Etobicoke Creek Trl,Etobicoke Creek Trl Branch,Forty Third St,,...,Multi-Use Trail - Entrance,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.54812 43.59027, -79.5480..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1440,1441,1441,1441,0,0,,Ontario Dr,Princes' Blvd,Princes' Blvd,,...,Bike Lane,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.41952 43.63201, -79.4198..."
1441,1442,1442,1442,0,0,,Royal York Rd,Eastbourne Cres,Lake Shore Blvd W,,...,Sharrows - Arterial - Connector,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.49350 43.60451, -79.4934..."
1442,1443,1443,1443,2005,2023,,Main St,Hamstead Ave,Lumsden Ave,,...,Sharrows,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.30584 43.69705, -79.3057..."
1443,1444,1444,1444,2023,0,,Temperance St,Bay St,Yonge St,,...,Bike Lane - Contraflow,,,,,,2024-02-28 20:11:16,,,"MULTILINESTRING ((-79.38107 43.65057, -79.3803..."


#### Load

Export to disk

In [20]:
fpath_cycle = glob(os.path.join(raw_data_dir, 'cycle_paths__*.parquet.gzip'))
if not fpath_cycle:
    flut.load(
        gdf_cycle,
        raw_data_dir,
        'cycle_paths',
        my_timezone,
        verbose=True,
    )

Exported 1,445 rows of cycle_paths data to /home/jovyan/data/raw/systems/toronto/cycle_paths__20240423_111339.parquet.gzip


## Discussion

### Conclusion

1. This step has downloaded data for
   - Public Transit (Bus) Stops
   - Public Library Branch Locations
   - Places of Interest
   - Cultural Hotspots
   - Cycling Network

## Next Step

The next step will process the raw bike share trips data that has been retrieved.

## Version Information

In [21]:
packages = [
    'requests',
    'tqdm',
    'geopandas',
    'pandas',
    'pyarrow',
]
print(
    watermark(
        updated=True,
        current_date=True,
        current_time=True,
        timezone=True,
        custom_time="%Y-%m-%d %H:%M:%S %Z",
        python=True,
        machine=True,
        packages=','.join(packages),
    )
)

Last updated: 2024-04-23 15:13:39 UTC

Python implementation: CPython
Python version       : 3.12.3
IPython version      : 8.22.2

requests : 2.31.0
tqdm     : 4.66.2
geopandas: 0.14.3
pandas   : 2.2.2
pyarrow  : 15.0.1

Compiler    : GCC 12.3.0
OS          : Linux
Release     : 6.8.0-76060800daily20240311-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit

