In [3]:
%pip install pandas

/home/richie/.cache/uv/archive-v0/1ZtaX71Qj6Bi-JQKgZagt/bin/python: No module named pip
Note: you may need to restart the kernel to use updated packages.


In [1]:
import requests
from pprint import pprint
import os
import pandas as pd



APP_ID = os.environ.get("TFL_APP_ID")
APP_ID = "Halfway"
APP_KEY = os.environ.get("TFL_APP_KEY")
APP_KEY = "21dbb2eeb688456e817278669ba2c9d4"
base_url = "https://api.tfl.gov.uk"


def fetch_tfl_data(endpoint, params=None):
    url = f"{base_url}{endpoint}"
    params = params or {}
    params.update({"app_id": APP_ID, "app_key": APP_KEY})

    try:
        response = requests.get(url, params=params, timeout=15)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        print(f"\nHTTP Error {response.status_code}: {response.text}")
        return None
    except Exception as e:
        print(f"\nError fetching {url}: {str(e)}")
        return None

# Constructing the TFL network as a Graph
### Steps
1. Decide on the list of transport modes you want and ensure they are supported
2. For each mode, get the list of lines (e.g. tube, victoria)
3. For each line, get the ordered list of stations that serve that line, name + id
4. For each station+line, construct a node_id and save these nodes
5. From the ordered list of stations, construct a set of same line edges, one station apart
6. For each station, append the lines it serves, and construct interstation edges
7. For the interline edges, query tfl for the times to get from each station to the next
8. For the interstation egdes, identify a method for determining the transition time (one idea, try a query for one station before and one after on each side, and subtract the known train times to that station.
9. Construct the graph with our nodes and edges
10. Run Dijkstra and get all node->node times
11. Collapse routes with multipe platforms to the minimum distance.
12. YUUU$$$

In [2]:
all_modes = fetch_tfl_data("/Line/Meta/Modes")
for mode in all_modes:
    print(mode["modeName"])


bus
cable-car
coach
cycle
cycle-hire
dlr
elizabeth-line
interchange-keep-sitting
interchange-secure
national-rail
overground
replacement-bus
river-bus
river-tour
taxi
tram
tube
walking


In [3]:
modes = ["tube", "overground", "dlr", "elizabeth-line", "tram"]
lines = fetch_tfl_data("/Line/Mode/" + ",".join(modes))

In [5]:
line_ids = []
lines_ld = []
for line in lines:
    #print(line["id"], line["name"], line["modeName"])
    line_ids.append(line["id"])
    line_d = {}
    line_d["line_name"] = line["name"]
    line_d["line_id"] = line["id"]
    lines_ld.append(line_d)

df = pd.DataFrame(lines_ld)
print(df)


             line_name           line_id
0             Bakerloo          bakerloo
1              Central           central
2               Circle            circle
3             District          district
4                  DLR               dlr
5       Elizabeth line         elizabeth
6   Hammersmith & City  hammersmith-city
7              Jubilee           jubilee
8              Liberty           liberty
9              Lioness           lioness
10        Metropolitan      metropolitan
11             Mildmay           mildmay
12            Northern          northern
13          Piccadilly        piccadilly
14         Suffragette       suffragette
15                Tram              tram
16            Victoria          victoria
17     Waterloo & City     waterloo-city
18              Weaver            weaver
19            Windrush          windrush


In [6]:
station_id = {}
station_dict = []

for line_id in line_ids:
    stops = fetch_tfl_data(f"/Line/{line_id}/StopPoints?tflOperatedNationalRailStationsOnly=false")
    #print(stops[0].keys())
    #pprint(stops[1], depth=1, compact=True)
    #print("\n\n")
    #pprint(stops[0]["children"])
    for stop in stops:
        name = stop["commonName"]
        naptanId = stop["naptanId"]
        #print(f"{naptanId}: {name} - {line_id}")
        station_id[naptanId] = name
        station = {}
        station["station_id"] = naptanId
        station["station_name"] = name
        station["station_line"] = line_id
        station_dict.append(station)
df = pd.DataFrame(station_dict)
df.head()

Unnamed: 0,station_id,station_name,station_line
0,940GZZLUBST,Baker Street Underground Station,bakerloo
1,940GZZLUCHX,Charing Cross Underground Station,bakerloo
2,940GZZLUEAC,Elephant & Castle Underground Station,bakerloo
3,940GZZLUEMB,Embankment Underground Station,bakerloo
4,940GZZLUERB,Edgware Road (Bakerloo) Underground Station,bakerloo


In [12]:
id = "elizabeth"
direction = "outbound"
stops = fetch_tfl_data(f"/Line/{id}/Route/Sequence/{direction}")
pprint(stops, depth=1, compact=True)

{'$type': 'Tfl.Api.Presentation.Entities.RouteSequence, '
          'Tfl.Api.Presentation.Entities',
 'direction': 'outbound',
 'isOutboundOnly': False,
 'lineId': 'elizabeth',
 'lineName': 'Elizabeth line',
 'lineStrings': [...],
 'mode': 'elizabeth-line',
 'orderedLineRoutes': [...],
 'stations': [...],
 'stopPointSequences': [...]}


In [13]:
route_stations = []

for station in stops["orderedLineRoutes"][0]["naptanIds"]: # Routes with variations have multiple  entries here
    print(f"{id}: {station}-{station_id[station]}")
    route_stations.append(f"{station}-{id}")

elizabeth: 910GABWDXR-Abbey Wood
elizabeth: 910GWOLWXR-Woolwich
elizabeth: 910GCSTMHSXR-Custom House
elizabeth: 910GCANWHRF-Canary Wharf
elizabeth: 910GWCHAPXR-Whitechapel
elizabeth: 910GLIVSTLL-Liverpool Street
elizabeth: 910GFRNDXR-Farringdon
elizabeth: 910GTOTCTRD-Tottenham Court Road
elizabeth: 910GBONDST-Bond Street
elizabeth: 910GPADTLL-Paddington
elizabeth: 910GACTONML-Acton Main Line Rail Station
elizabeth: 910GEALINGB-Ealing Broadway Rail Station
elizabeth: 910GWEALING-West Ealing Rail Station
elizabeth: 910GHANWELL-Hanwell Rail Station
elizabeth: 910GSTHALL-Southall Rail Station
elizabeth: 910GHAYESAH-Hayes & Harlington Rail Station
elizabeth: 910GHTRWAPT-Heathrow Terminals 2 & 3 Rail Station
elizabeth: 910GHTRWTM4-Heathrow Terminal 4 Rail Station


In [14]:
print(route_stations)

['910GABWDXR-elizabeth', '910GWOLWXR-elizabeth', '910GCSTMHSXR-elizabeth', '910GCANWHRF-elizabeth', '910GWCHAPXR-elizabeth', '910GLIVSTLL-elizabeth', '910GFRNDXR-elizabeth', '910GTOTCTRD-elizabeth', '910GBONDST-elizabeth', '910GPADTLL-elizabeth', '910GACTONML-elizabeth', '910GEALINGB-elizabeth', '910GWEALING-elizabeth', '910GHANWELL-elizabeth', '910GSTHALL-elizabeth', '910GHAYESAH-elizabeth', '910GHTRWAPT-elizabeth', '910GHTRWTM4-elizabeth']


In [15]:
for x,y in zip(route_stations, route_stations[1:]):
    print(x, y)

910GABWDXR-elizabeth 910GWOLWXR-elizabeth
910GWOLWXR-elizabeth 910GCSTMHSXR-elizabeth
910GCSTMHSXR-elizabeth 910GCANWHRF-elizabeth
910GCANWHRF-elizabeth 910GWCHAPXR-elizabeth
910GWCHAPXR-elizabeth 910GLIVSTLL-elizabeth
910GLIVSTLL-elizabeth 910GFRNDXR-elizabeth
910GFRNDXR-elizabeth 910GTOTCTRD-elizabeth
910GTOTCTRD-elizabeth 910GBONDST-elizabeth
910GBONDST-elizabeth 910GPADTLL-elizabeth
910GPADTLL-elizabeth 910GACTONML-elizabeth
910GACTONML-elizabeth 910GEALINGB-elizabeth
910GEALINGB-elizabeth 910GWEALING-elizabeth
910GWEALING-elizabeth 910GHANWELL-elizabeth
910GHANWELL-elizabeth 910GSTHALL-elizabeth
910GSTHALL-elizabeth 910GHAYESAH-elizabeth
910GHAYESAH-elizabeth 910GHTRWAPT-elizabeth
910GHTRWAPT-elizabeth 910GHTRWTM4-elizabeth


In [8]:
#pprint(stops, depth=1, compact=True)
pprint(stops["orderedLineRoutes"][0], depth=2, compact=True)
route_stops = stops["orderedLineRoutes"][0]["naptanIds"]

{'$type': 'Tfl.Api.Presentation.Entities.OrderedRoute, '
          'Tfl.Api.Presentation.Entities',
 'name': 'Abbey Wood &harr;  Heathrow Terminal 4 ',
 'naptanIds': ['910GABWDXR', '910GWOLWXR', '910GCSTMHSXR', '910GCANWHRF',
               '910GWCHAPXR', '910GLIVSTLL', '910GFRNDXR', '910GTOTCTRD',
               '910GBONDST', '910GPADTLL', '910GACTONML', '910GEALINGB',
               '910GWEALING', '910GHANWELL', '910GSTHALL', '910GHAYESAH',
               '910GHTRWAPT', '910GHTRWTM4'],
 'serviceType': 'Regular'}


In [9]:
id = "elizabeth"
fromStopPointId = "910GPADTLL"
toStopPointId = "910GHTRWTM4"

timetable = fetch_tfl_data(f"/Line/{id}/Timetable/{fromStopPointId}/to/{toStopPointId}")

In [10]:
id = "elizabeth"
from_ = "910GABWDXR"
to_ = "910GWOLWXR"

for from_, to_ in zip(route_stops, route_stops[1:]):
    timetable = fetch_tfl_data(f"/Journey/JourneyResults/{from_}/to/{to_}?useRealTimeLiveArrivals=false")
    durations = []
    for journey in timetable["journeys"]:
        durations.append(journey["duration"])
    print(f"{from_}->{to_}={min(durations)}")





910GABWDXR->910GWOLWXR=3
910GWOLWXR->910GCSTMHSXR=4
910GCSTMHSXR->910GCANWHRF=4
910GCANWHRF->910GWCHAPXR=4
910GWCHAPXR->910GLIVSTLL=3
910GLIVSTLL->910GFRNDXR=2
910GFRNDXR->910GTOTCTRD=2
910GTOTCTRD->910GBONDST=2
910GBONDST->910GPADTLL=4
910GPADTLL->910GACTONML=6
910GACTONML->910GEALINGB=3
910GEALINGB->910GWEALING=2
910GWEALING->910GHANWELL=2
910GHANWELL->910GSTHALL=3
910GSTHALL->910GHAYESAH=3
910GHAYESAH->910GHTRWAPT=7
910GHTRWAPT->910GHTRWTM4=5


In [11]:
id = "elizabeth"
from_ = "910GABWDXR"
to_ = "910GWOLWXR"

for from_, to_ in zip(route_stops, route_stops[1:]):
    timetable = fetch_tfl_data(f"/Journey/JourneyResults/{from_}/to/{to_}?useRealTimeLiveArrivals=false")
    durations = []
    for journey in timetable["journeys"]:
        durations.append(journey["duration"])
    print(f"{from_}->{to_}={min(durations)}")





910GABWDXR->910GWOLWXR=3
910GWOLWXR->910GCSTMHSXR=4
910GCSTMHSXR->910GCANWHRF=4
910GCANWHRF->910GWCHAPXR=4
910GWCHAPXR->910GLIVSTLL=3
910GLIVSTLL->910GFRNDXR=2
910GFRNDXR->910GTOTCTRD=2
910GTOTCTRD->910GBONDST=2
910GBONDST->910GPADTLL=4
910GPADTLL->910GACTONML=6
910GACTONML->910GEALINGB=3
910GEALINGB->910GWEALING=2
910GWEALING->910GHANWELL=2
910GHANWELL->910GSTHALL=3
910GSTHALL->910GHAYESAH=3
910GHAYESAH->910GHTRWAPT=7
910GHTRWAPT->910GHTRWTM4=5


In [12]:
pprint(timetable, depth=3)

{'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.ItineraryResult, '
          'Tfl.Api.Presentation.Entities',
 'journeyVector': {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.JourneyVector, '
                            'Tfl.Api.Presentation.Entities',
                   'from': '1001147',
                   'to': '1000104',
                   'uri': '/journey/journeyresults/910ghtrwapt/to/910ghtrwtm4?userealtimelivearrivals=false&app_id=halfway&app_key=21dbb2eeb688456e817278669ba2c9d4',
                   'via': ''},
 'journeys': [{'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.Journey, '
                        'Tfl.Api.Presentation.Entities',
               'alternativeRoute': False,
               'arrivalDateTime': '2025-02-12T15:12:00',
               'duration': 5,
               'fare': {...},
               'legs': [...],
               'startDateTime': '2025-02-12T15:07:00'},
              {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.Journe

In [13]:
pprint(timetable["timetable"]["routes"][0]["schedules"][0]) # periods/frequency
# https://api-portal.tfl.gov.uk/api-details#api=Line&operation=Line_TimetableByPathFromStopPointIdPathId

KeyError: 'timetable'

In [None]:
pprint(timetable["timetable"]["routes"][0]["schedules"][0]["periods"][1]["frequency"]["highestFrequency"]) # periods/frequency
pprint(timetable["timetable"]["routes"][0]["schedules"][0]["periods"][1]["frequency"]["lowestFrequency"]) # periods/frequency

# https://api-portal.tfl.gov.uk/api-details#api=Line&operation=Line_TimetableByPathFromStopPointIdPathId

In [None]:

def time_from_journey(journey):
    return int(journey["hour"]) * 60 + int(journey["minute"])
    
first = timetable["timetable"]["routes"][0]["schedules"][0]["firstJourney"]
first_time = time_from_journey(first)
for schedule in timetable["timetable"]["routes"][0]["schedules"][0]["knownJourneys"][1:]:
    next_time = time_from_journey(schedule)
    print(f"Time between Trains = {next_time - first_time} mins")
    first_time = next_time

    

In [None]:
pprint(timetable["timetable"]["routes"][0]["stationIntervals"], depth=2)

In [None]:
for stop in timetable["timetable"]["routes"][0]["stationIntervals"][0]["intervals"]:
    name = station_id[stop["stopId"]]
    arrivalTime = stop["timeToArrival"]
    print(f"{name}: {arrivalTime}") # Why are there 2 for almost all stations?

In [None]:
for stop in timetable["timetable"]["routes"][0]["stationIntervals"][1]["intervals"]:
    name = station_id[stop["stopId"]]
    arrivalTime = stop["timeToArrival"]
    print(f"{name}: {arrivalTime}") # Why are there 2 for almost all stations?