# Pick a random station

In [45]:
import io
import json
import pandas as pd
from random import randint
import requests
import zipfile

## Get all stations

Toronto's _TTC Routes and Schedules_ data source: https://open.toronto.ca/dataset/ttc-routes-and-schedules/

In [42]:
url = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show"
params = { "id": "7795b45e-e65a-4465-81fc-c36b9dfff169"}
package = requests.get(url, params = params).json()
# print(package["result"])

In [5]:
print(package["result"]["resources"][0]["url"])

https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/7795b45e-e65a-4465-81fc-c36b9dfff169/resource/cfb6b2b8-6191-41e3-bda1-b175c51148cb/download/opendata_ttc_schedules.zip


#### Download zip file and get `stops.txt` file contents

In [6]:
r = requests.get("https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/7795b45e-e65a-4465-81fc-c36b9dfff169/resource/cfb6b2b8-6191-41e3-bda1-b175c51148cb/download/opendata_ttc_schedules.zip")

In [7]:
zip_file = zipfile.ZipFile(io.BytesIO(r.content))

In [39]:
zip_file.filelist

[<ZipInfo filename='agency.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=149 compress_size=107>,
 <ZipInfo filename='calendar.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=379 compress_size=121>,
 <ZipInfo filename='calendar_dates.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=553 compress_size=141>,
 <ZipInfo filename='routes.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=9301 compress_size=2318>,
 <ZipInfo filename='shapes.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=13986788 compress_size=3043106>,
 <ZipInfo filename='stops.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=680413 compress_size=216106>,
 <ZipInfo filename='stop_times.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=200072079 compress_size=30129290>,
 <ZipInfo filename='trips.txt' compress_type=deflate filemode='-rw-rw-rw-' file_size=11616799 compress_size=653096>]

In [8]:
stops_raw = zip_file.read("stops.txt")

#### Parse out only records that are `Stations` 

In [94]:
stops_df = pd.read_csv(io.BytesIO(stops_raw), keep_default_na=False)

In [95]:
stops_containing_station_df = pd.DataFrame(stops_df[stops_df["stop_name"].str.contains(" Station -")])

In [96]:
stops_containing_station_df["station_name"] = \
    stops_containing_station_df["stop_name"].str.split("Station -").str[0].str.strip()

In [97]:
stations_df = stops_containing_station_df.drop_duplicates(subset=["station_name"])

In [98]:
stations = json.loads(stations_df.to_json(orient="records"))

In [99]:
stations[3]

{'stop_id': 14407,
 'stop_code': 13792,
 'stop_name': 'York Mills Station - Southbound Platform',
 'stop_desc': '',
 'stop_lat': 43.743248,
 'stop_lon': -79.405991,
 'zone_id': '',
 'stop_url': '',
 'location_type': '',
 'parent_station': '',
 'stop_timezone': '',
 'wheelchair_boarding': 1,
 'station_name': 'York Mills'}

#### Pick a random station

In [100]:
print(f"Number of stations: {len(stations)}")

Number of stations: 75


In [101]:
rand_i = randint(0, len(stations) - 1)
print(f"Random index choice: {rand_i}")

Random index choice: 38


In [102]:
stations[rand_i]

{'stop_id': 14474,
 'stop_code': 13773,
 'stop_name': 'High Park Station - Eastbound Platform',
 'stop_desc': '',
 'stop_lat': 43.653849,
 'stop_lon': -79.466991,
 'zone_id': '',
 'stop_url': '',
 'location_type': '',
 'parent_station': '',
 'stop_timezone': '',
 'wheelchair_boarding': 2,
 'station_name': 'High Park'}

In [83]:
# stations