In [1]:
import requests
import pandas as pd

In [2]:
URL = 'http://data.itsfactory.fi/journeys/api/1/vehicle-activity'

response = requests.get(URL).json()
body = response['body']

In [3]:
type(body)

list

In [4]:
def body_to_df(body: list, logging: int = 0) -> pd.DataFrame:
    """
    Function that takes the body of the GET request to JourneysAPI Vehicle Activity endpoint and flattens it to a dataframe.

    params:
        - body: requests.get(URL).json()['body']
        - logging: <= 0 - no logging/printing, >= 1 - print number of errors when flattening, >= 2 print every error message
    """
    def delay_sec(s: str) -> int:
        if s[0] == '-':
            neg = -1
            s = s[11:len(s)-5].split('M')
        else:
            neg = 1
            s = s[10:len(s)-5].split('M')
        return neg * (60*int(s[0]) + int(s[1]))

    def stop_id(s: str) -> str:
        return s[-4:]

    header = ['Recorded_At', 'Line', 'Direction', 'Date', 'Lon', 'Lat', 'Delay', 'Departure_Time', 'Stop', 'Stop_Order']
    
    lines = []
    broken_lines = 0
    for bus in body:
        try:
            rec_ts = bus['recordedAtTime']
            line = bus['monitoredVehicleJourney']['lineRef']
            direction = bus['monitoredVehicleJourney']['directionRef']
            date = bus['monitoredVehicleJourney']['framedVehicleJourneyRef']['dateFrameRef']
            lon = bus['monitoredVehicleJourney']['vehicleLocation']['longitude']
            lat = bus['monitoredVehicleJourney']['vehicleLocation']['latitude']
            delay = delay_sec(bus['monitoredVehicleJourney']['delay'])
            dep_time = bus['monitoredVehicleJourney']['originAimedDepartureTime']

            const_line = [rec_ts, line, direction, date, lon, lat, delay, dep_time]

            for onward_call in bus['monitoredVehicleJourney']['onwardCalls']:
                lines.append(const_line + [stop_id(onward_call['stopPointRef']), int(onward_call['order'])])
        except Exception as e:
            if logging > 1:
                print(e)
            broken_lines += 1
    if logging > 0:
        print('Total # of broken records (entire vehicle journeys or onward calls):', broken_lines)
    
    return pd.DataFrame(lines, columns=header)

In [6]:
df = body_to_df(body, 2)
df

Total # of broken records (entire vehicle journeys or onward calls): 0


Unnamed: 0,Recorded_At,Line,Direction,Date,Lon,Lat,Delay,Departure_Time,Stop,Stop_Order
0,2023-12-09T10:39:48.235+02:00,13,2,2023-12-09,23.8491402,61.4508362,113,0935,3699,56
1,2023-12-09T10:39:48.235+02:00,13,2,2023-12-09,23.8491402,61.4508362,113,0935,3704,57
2,2023-12-09T10:39:48.235+02:00,13,2,2023-12-09,23.8491402,61.4508362,113,0935,3714,58
3,2023-12-09T10:39:48.235+02:00,13,2,2023-12-09,23.8491402,61.4508362,113,0935,3152,59
4,2023-12-09T10:39:48.235+02:00,13,2,2023-12-09,23.8491402,61.4508362,113,0935,3617,60
...,...,...,...,...,...,...,...,...,...,...
3941,2023-12-09T10:39:48.226+02:00,10,2,2023-12-09,23.7879391,61.4879532,335,1015,3583,31
3942,2023-12-09T10:39:48.226+02:00,10,2,2023-12-09,23.7879391,61.4879532,335,1015,4072,32
3943,2023-12-09T10:39:48.226+02:00,10,2,2023-12-09,23.7879391,61.4879532,335,1015,4074,33
3944,2023-12-09T10:39:48.226+02:00,10,2,2023-12-09,23.7879391,61.4879532,335,1015,4076,34
