In [1]:
!uv pip install pandas

[2mUsing Python 3.13.2 environment at: /home/richie/halfway/db-builder/.venv[0m
[2mAudited [1m1 package[0m [2min 1ms[0m[0m


In [2]:
import requests
from pprint import pprint
import os
import pandas as pd



APP_ID = os.environ.get("TFL_APP_ID")
APP_ID = "Halfway"
APP_KEY = os.environ.get("TFL_APP_KEY")
APP_KEY = "21dbb2eeb688456e817278669ba2c9d4"
base_url = "https://api.tfl.gov.uk"


def fetch_tfl_data(endpoint, params=None):
    url = f"{base_url}{endpoint}"
    params = params or {}
    params.update({"app_id": APP_ID, "app_key": APP_KEY})

    try:
        response = requests.get(url, params=params, timeout=15)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        print(f"\nHTTP Error {response.status_code}: {response.text}")
        return None
    except Exception as e:
        print(f"\nError fetching {url}: {str(e)}")
        return None

# Constructing the TFL network as a Graph
### Steps
1. Decide on the list of transport modes you want and ensure they are supported
2. For each mode, get the list of lines (e.g. tube, victoria)
3. For each line, get the ordered list of stations that serve that line, name + id
4. For each station+line, construct a node_id and save these nodes
5. From the ordered list of stations, construct a set of same line edges, one station apart
6. For each station, append the lines it serves, and construct interstation edges
7. For the interline edges, query tfl for the times to get from each station to the next
8. For the interstation egdes, identify a method for determining the transition time (one idea, try a query for one station before and one after on each side, and subtract the known train times to that station.
9. Construct the graph with our nodes and edges
10. Run Dijkstra and get all node->node times
11. Collapse routes with multipe platforms to the minimum distance.
12. YUUU$$$

In [3]:
all_modes = fetch_tfl_data("/Line/Meta/Modes")
for mode in all_modes:
    print(mode["modeName"])


bus
cable-car
coach
cycle
cycle-hire
dlr
elizabeth-line
interchange-keep-sitting
interchange-secure
national-rail
overground
replacement-bus
river-bus
river-tour
taxi
tram
tube
walking


In [4]:
modes = ["tube", "overground", "dlr", "elizabeth-line", "tram"]
lines = fetch_tfl_data("/Line/Mode/" + ",".join(modes))

In [5]:
line_ids = []
lines_ld = []
for line in lines:
    #print(line["id"], line["name"], line["modeName"])
    line_ids.append(line["id"])
    line_d = {}
    line_d["line_name"] = line["name"]
    line_d["line_id"] = line["id"]
    lines_ld.append(line_d)

df = pd.DataFrame(lines_ld)
print(df)


             line_name           line_id
0             Bakerloo          bakerloo
1              Central           central
2               Circle            circle
3             District          district
4                  DLR               dlr
5       Elizabeth line         elizabeth
6   Hammersmith & City  hammersmith-city
7              Jubilee           jubilee
8              Liberty           liberty
9              Lioness           lioness
10        Metropolitan      metropolitan
11             Mildmay           mildmay
12            Northern          northern
13          Piccadilly        piccadilly
14         Suffragette       suffragette
15                Tram              tram
16            Victoria          victoria
17     Waterloo & City     waterloo-city
18              Weaver            weaver
19            Windrush          windrush


In [17]:
id = "HUBEPH"
stops = fetch_tfl_data(f"/StopPoint/940GZZLUBST,{id},940GZZLULGT")
pprint(stops, depth=2)
#pprint(stops["children"], depth=2)
#for x in stops["children"]:
#    print(x["commonName"], x["naptanId"], x["stationNaptan"])


[{'$type': 'Tfl.Api.Presentation.Entities.StopPoint, '
           'Tfl.Api.Presentation.Entities',
  'additionalProperties': [...],
  'children': [...],
  'commonName': 'Baker Street Underground Station',
  'icsCode': '1000011',
  'id': '940GZZLUBST',
  'lat': 51.522883,
  'lineGroup': [...],
  'lineModeGroups': [...],
  'lines': [...],
  'lon': -0.15713,
  'modes': [...],
  'naptanId': '940GZZLUBST',
  'placeType': 'StopPoint',
  'stationNaptan': '940GZZLUBST',
  'status': True,
  'stopType': 'NaptanMetroStation'},
 {'$type': 'Tfl.Api.Presentation.Entities.StopPoint, '
           'Tfl.Api.Presentation.Entities',
  'additionalProperties': [...],
  'children': [...],
  'commonName': 'Lancaster Gate Underground Station',
  'icsCode': '1000133',
  'id': '940GZZLULGT',
  'lat': 51.511723,
  'lineGroup': [...],
  'lineModeGroups': [...],
  'lines': [...],
  'lon': -0.175494,
  'modes': [...],
  'naptanId': '940GZZLULGT',
  'placeType': 'StopPoint',
  'stationNaptan': '940GZZLULGT',
  'statu

In [14]:
station_id = {}
station_dict = []
hubs = []
strs = []

for line_id in line_ids:
    stops = fetch_tfl_data(f"/Line/{line_id}/StopPoints?tflOperatedNationalRailStationsOnly=false")
    #print(stops[0].keys())
    #pprint(stops, depth=2, compact=True)
    #print("\n\n")
    #pprint(stops[0]["children"])
    for stop in stops:
        name = stop["commonName"]
        naptanId = stop["stationNaptan"]
        hub_name = name
        if hub := stop.get("hubNaptanCode", None):
            hubs.append(hub)
            hub_name = None
        print(line_id, name, naptanId, hub, hub_name)

        
        #pprint(stop, depth=2)
        #for x in stop["children"]:
        #    print(line_id, x["commonName"], x["naptanId"], x["stationNaptan"], hub)

        #print(f"{naptanId}: {name} - {line_id} - {hub}")
        strs.append(f"{name} - {naptanId} - {line_id} = {hub}")
        station_id[naptanId] = name
        station = {}
        station["station_id"] = naptanId
        station["station_name"] = name
        station["station_line"] = line_id
        station_dict.append(station)
df = pd.DataFrame(station_dict)
strs = sorted(strs)
for s in strs:
    print(s)
#print(hubs)
#df.head()

bakerloo Baker Street Underground Station 940GZZLUBST None Baker Street Underground Station
bakerloo Charing Cross Underground Station 940GZZLUCHX HUBCHX None
bakerloo Elephant & Castle Underground Station 940GZZLUEAC HUBEPH None
bakerloo Embankment Underground Station 940GZZLUEMB None Embankment Underground Station
bakerloo Edgware Road (Bakerloo) Underground Station 940GZZLUERB None Edgware Road (Bakerloo) Underground Station
bakerloo Harrow & Wealdstone Underground Station 940GZZLUHAW HUBHRW None
bakerloo Harlesden Underground Station 940GZZLUHSN HUBHDN None
bakerloo Kenton Underground Station 940GZZLUKEN HUBKNT None
bakerloo Kilburn Park Underground Station 940GZZLUKPK None Kilburn Park Underground Station
bakerloo Kensal Green Underground Station 940GZZLUKSL HUBKNL None
bakerloo Lambeth North Underground Station 940GZZLULBN None Lambeth North Underground Station
bakerloo Maida Vale Underground Station 940GZZLUMVL None Maida Vale Underground Station
bakerloo Marylebone Underground 

In [16]:
id = "district"
direction = "outbound"
stops = fetch_tfl_data(f"/Line/{id}/Route/Sequence/{direction}")
#pprint(stops, depth=1, compact=True)
pprint(stops["stations"][-1], depth=3, compact=True)


{'$type': 'Tfl.Api.Presentation.Entities.MatchedStop, '
          'Tfl.Api.Presentation.Entities',
 'icsId': '1000268',
 'id': 'HUBZWL',
 'lat': 51.519498,
 'lines': [{'$type': 'Tfl.Api.Presentation.Entities.Identifier, '
                     'Tfl.Api.Presentation.Entities',
            'crowding': {...},
            'id': '205',
            'name': '205',
            'routeType': 'Unknown',
            'status': 'Unknown',
            'type': 'Line',
            'uri': '/Line/205'},
           {'$type': 'Tfl.Api.Presentation.Entities.Identifier, '
                     'Tfl.Api.Presentation.Entities',
            'crowding': {...},
            'id': '25',
            'name': '25',
            'routeType': 'Unknown',
            'status': 'Unknown',
            'type': 'Line',
            'uri': '/Line/25'},
           {'$type': 'Tfl.Api.Presentation.Entities.Identifier, '
                     'Tfl.Api.Presentation.Entities',
            'crowding': {...},
            'id': '254',
     

In [11]:
route_stations = []

for station in stops["orderedLineRoutes"][0]["naptanIds"]: # Routes with variations have multiple  entries here
    print(f"{id}: {station}-{station_id[station]}")
    route_stations.append(f"{station}-{id}")

district: 940GZZLUEBY-Ealing Broadway Underground Station
district: 940GZZLUECM-Ealing Common Underground Station
district: 940GZZLUACT-Acton Town Underground Station
district: 940GZZLUCWP-Chiswick Park Underground Station
district: 940GZZLUTNG-Turnham Green Underground Station
district: 940GZZLUSFB-Stamford Brook Underground Station
district: 940GZZLURVP-Ravenscourt Park Underground Station
district: 940GZZLUHSD-Hammersmith (Dist&Picc Line) Underground Station
district: 940GZZLUBSC-Barons Court Underground Station
district: 940GZZLUWKN-West Kensington Underground Station
district: 940GZZLUECT-Earl's Court Underground Station
district: 940GZZLUGTR-Gloucester Road Underground Station
district: 940GZZLUSKS-South Kensington Underground Station
district: 940GZZLUSSQ-Sloane Square Underground Station
district: 940GZZLUVIC-Victoria Underground Station
district: 940GZZLUSJP-St. James's Park Underground Station
district: 940GZZLUWSM-Westminster Underground Station
district: 940GZZLUEMB-Embankm

In [9]:
route_stations = []

for station in stops["orderedLineRoutes"][0]["naptanIds"]: # Routes with variations have multiple  entries here
    print(f"{id}: {station}-{station_id[station]}")
    route_stations.append(f"{station}-{id}")

windrush: 910GHGHI-Highbury & Islington Rail Station
windrush: 910GCNNB-Canonbury Rail Station
windrush: 910GDALS-Dalston Junction Rail Station
windrush: 910GHAGGERS-Haggerston Rail Station
windrush: 910GHOXTON-Hoxton Rail Station
windrush: 910GSHRDHST-Shoreditch High Street Rail Station
windrush: 910GWCHAPEL-Whitechapel Rail Station
windrush: 910GSHADWEL-Shadwell Rail Station
windrush: 910GWAPPING-Wapping Rail Station
windrush: 910GRTHERHI-Rotherhithe Rail Station
windrush: 910GCNDAW-Canada Water Rail Station
windrush: 910GSURREYQ-Surrey Quays Rail Station
windrush: 910GNWCRELL-New Cross ELL Rail Station


In [14]:
print(route_stations)

['910GABWDXR-elizabeth', '910GWOLWXR-elizabeth', '910GCSTMHSXR-elizabeth', '910GCANWHRF-elizabeth', '910GWCHAPXR-elizabeth', '910GLIVSTLL-elizabeth', '910GFRNDXR-elizabeth', '910GTOTCTRD-elizabeth', '910GBONDST-elizabeth', '910GPADTLL-elizabeth', '910GACTONML-elizabeth', '910GEALINGB-elizabeth', '910GWEALING-elizabeth', '910GHANWELL-elizabeth', '910GSTHALL-elizabeth', '910GHAYESAH-elizabeth', '910GHTRWAPT-elizabeth', '910GHTRWTM4-elizabeth']


In [15]:
for x,y in zip(route_stations, route_stations[1:]):
    print(x, y)

910GABWDXR-elizabeth 910GWOLWXR-elizabeth
910GWOLWXR-elizabeth 910GCSTMHSXR-elizabeth
910GCSTMHSXR-elizabeth 910GCANWHRF-elizabeth
910GCANWHRF-elizabeth 910GWCHAPXR-elizabeth
910GWCHAPXR-elizabeth 910GLIVSTLL-elizabeth
910GLIVSTLL-elizabeth 910GFRNDXR-elizabeth
910GFRNDXR-elizabeth 910GTOTCTRD-elizabeth
910GTOTCTRD-elizabeth 910GBONDST-elizabeth
910GBONDST-elizabeth 910GPADTLL-elizabeth
910GPADTLL-elizabeth 910GACTONML-elizabeth
910GACTONML-elizabeth 910GEALINGB-elizabeth
910GEALINGB-elizabeth 910GWEALING-elizabeth
910GWEALING-elizabeth 910GHANWELL-elizabeth
910GHANWELL-elizabeth 910GSTHALL-elizabeth
910GSTHALL-elizabeth 910GHAYESAH-elizabeth
910GHAYESAH-elizabeth 910GHTRWAPT-elizabeth
910GHTRWAPT-elizabeth 910GHTRWTM4-elizabeth


In [8]:
#pprint(stops, depth=1, compact=True)
pprint(stops["orderedLineRoutes"][0], depth=2, compact=True)
route_stops = stops["orderedLineRoutes"][0]["naptanIds"]

{'$type': 'Tfl.Api.Presentation.Entities.OrderedRoute, '
          'Tfl.Api.Presentation.Entities',
 'name': 'Abbey Wood &harr;  Heathrow Terminal 4 ',
 'naptanIds': ['910GABWDXR', '910GWOLWXR', '910GCSTMHSXR', '910GCANWHRF',
               '910GWCHAPXR', '910GLIVSTLL', '910GFRNDXR', '910GTOTCTRD',
               '910GBONDST', '910GPADTLL', '910GACTONML', '910GEALINGB',
               '910GWEALING', '910GHANWELL', '910GSTHALL', '910GHAYESAH',
               '910GHTRWAPT', '910GHTRWTM4'],
 'serviceType': 'Regular'}


In [24]:
id = "elizabeth"
fromStopPointId = "Whitechapel"
toStopPointId = "910GHTRWTM4"

timetable = fetch_tfl_data(f"/Line/{id}/Timetable/{fromStopPointId}/to/{toStopPointId}")


HTTP Error 400: {"$type":"Tfl.Api.Presentation.Entities.ApiError, Tfl.Api.Presentation.Entities","timestampUtc":"2025-03-21T22:03:08.9493123Z","exceptionType":"ApiArgumentException","httpStatusCode":400,"httpStatus":"BadRequest","relativeUri":"/Line/elizabeth/Timetable/Whitechapel/to/910GHTRWTM4?app_id=Halfway&app_key=21dbb2eeb688456e817278669ba2c9d4","message":"The following stop points are not recognised: Whitechapel"}


In [25]:
timetable = fetch_tfl_data(f"/Journey/JourneyResults/{fromStopPointId}/to/{toStopPointId}?useRealTimeLiveArrivals=false")
pprint(timetable, depth=2)

{'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.DisambiguationResult, '
          'Tfl.Api.Presentation.Entities',
 'fromLocationDisambiguation': {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.Disambiguation, '
                                         'Tfl.Api.Presentation.Entities',
                                'disambiguationOptions': [...],
                                'matchStatus': 'list'},
 'journeyVector': {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.JourneyVector, '
                            'Tfl.Api.Presentation.Entities',
                   'from': 'Whitechapel',
                   'to': '1000104',
                   'uri': '/journey/journeyresults/whitechapel/to/910ghtrwtm4?userealtimelivearrivals=false&app_id=halfway&app_key=21dbb2eeb688456e817278669ba2c9d4',
                   'via': ''},
 'recommendedMaxAgeMinutes': 1440,
 'searchCriteria': {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.SearchCriteria, '
                       

In [19]:
id = "elizabeth"
from_ = "910GABWDXR"
to_ = "910GWOLWXR"

for from_, to_ in zip(route_stops, route_stops[1:]):
    timetable = fetch_tfl_data(f"/Journey/JourneyResults/{from_}/to/{to_}?useRealTimeLiveArrivals=false")
    durations = []
    for journey in timetable["journeys"]:
        durations.append(journey["duration"])
    print(f"{from_}->{to_}={min(durations)}")





NameError: name 'route_stops' is not defined

In [11]:
id = "elizabeth"
from_ = "910GABWDXR"
to_ = "910GWOLWXR"

for from_, to_ in zip(route_stops, route_stops[1:]):
    timetable = fetch_tfl_data(f"/Journey/JourneyResults/{from_}/to/{to_}?useRealTimeLiveArrivals=false")
    durations = []
    for journey in timetable["journeys"]:
        durations.append(journey["duration"])
    print(f"{from_}->{to_}={min(durations)}")





910GABWDXR->910GWOLWXR=3
910GWOLWXR->910GCSTMHSXR=4
910GCSTMHSXR->910GCANWHRF=4
910GCANWHRF->910GWCHAPXR=4
910GWCHAPXR->910GLIVSTLL=3
910GLIVSTLL->910GFRNDXR=2
910GFRNDXR->910GTOTCTRD=2
910GTOTCTRD->910GBONDST=2
910GBONDST->910GPADTLL=4
910GPADTLL->910GACTONML=6
910GACTONML->910GEALINGB=3
910GEALINGB->910GWEALING=2
910GWEALING->910GHANWELL=2
910GHANWELL->910GSTHALL=3
910GSTHALL->910GHAYESAH=3
910GHAYESAH->910GHTRWAPT=7
910GHTRWAPT->910GHTRWTM4=5


In [12]:
pprint(timetable, depth=3)

{'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.ItineraryResult, '
          'Tfl.Api.Presentation.Entities',
 'journeyVector': {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.JourneyVector, '
                            'Tfl.Api.Presentation.Entities',
                   'from': '1001147',
                   'to': '1000104',
                   'uri': '/journey/journeyresults/910ghtrwapt/to/910ghtrwtm4?userealtimelivearrivals=false&app_id=halfway&app_key=21dbb2eeb688456e817278669ba2c9d4',
                   'via': ''},
 'journeys': [{'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.Journey, '
                        'Tfl.Api.Presentation.Entities',
               'alternativeRoute': False,
               'arrivalDateTime': '2025-02-12T15:12:00',
               'duration': 5,
               'fare': {...},
               'legs': [...],
               'startDateTime': '2025-02-12T15:07:00'},
              {'$type': 'Tfl.Api.Presentation.Entities.JourneyPlanner.Journe

In [13]:
pprint(timetable["timetable"]["routes"][0]["schedules"][0]) # periods/frequency
# https://api-portal.tfl.gov.uk/api-details#api=Line&operation=Line_TimetableByPathFromStopPointIdPathId

KeyError: 'timetable'

In [None]:
pprint(timetable["timetable"]["routes"][0]["schedules"][0]["periods"][1]["frequency"]["highestFrequency"]) # periods/frequency
pprint(timetable["timetable"]["routes"][0]["schedules"][0]["periods"][1]["frequency"]["lowestFrequency"]) # periods/frequency

# https://api-portal.tfl.gov.uk/api-details#api=Line&operation=Line_TimetableByPathFromStopPointIdPathId

In [None]:

def time_from_journey(journey):
    return int(journey["hour"]) * 60 + int(journey["minute"])
    
first = timetable["timetable"]["routes"][0]["schedules"][0]["firstJourney"]
first_time = time_from_journey(first)
for schedule in timetable["timetable"]["routes"][0]["schedules"][0]["knownJourneys"][1:]:
    next_time = time_from_journey(schedule)
    print(f"Time between Trains = {next_time - first_time} mins")
    first_time = next_time

    

In [None]:
pprint(timetable["timetable"]["routes"][0]["stationIntervals"], depth=2)

In [None]:
for stop in timetable["timetable"]["routes"][0]["stationIntervals"][0]["intervals"]:
    name = station_id[stop["stopId"]]
    arrivalTime = stop["timeToArrival"]
    print(f"{name}: {arrivalTime}") # Why are there 2 for almost all stations?

In [None]:
for stop in timetable["timetable"]["routes"][0]["stationIntervals"][1]["intervals"]:
    name = station_id[stop["stopId"]]
    arrivalTime = stop["timeToArrival"]
    print(f"{name}: {arrivalTime}") # Why are there 2 for almost all stations?