In [1]:
import requests
import pandas as pd

In [2]:
URL = 'http://data.itsfactory.fi/journeys/api/1/vehicle-activity'

response = requests.get(URL).json()
body = response['body']

In [3]:
type(body)

list

In [4]:
def body_to_df(body: list, logging: int = 0) -> pd.DataFrame:
    """
    Function that takes the body of the GET request to JourneysAPI Vehicle Activity endpoint and flattens it to a dataframe.

    params:
        - body: requests.get(URL).json()['body']
        - logging: <= 0 - no logging/printing, >= 1 - print number of errors when flattening, >= 2 print every error message
    """
    def delay_sec(s: str) -> int:
        if s[0] == '-':
            neg = -1
            s = s[11:len(s)-5].split('M')
        else:
            neg = 1
            s = s[10:len(s)-5].split('M')
        return neg * (60*int(s[0]) + int(s[1]))

    def stop_id(s: str) -> str:
        return s[-4:]

    header = ['Recorded_At', 'Line', 'Direction', 'Date', 'Lon', 'Lat', 'Delay', 'Departure_Time', 'Stop', 'Stop_Order']
    
    lines = []
    broken_lines = 0
    for bus in body:
        try:
            rec_ts = bus['recordedAtTime']
            line = bus['monitoredVehicleJourney']['lineRef']
            direction = bus['monitoredVehicleJourney']['directionRef']
            date = bus['monitoredVehicleJourney']['framedVehicleJourneyRef']['dateFrameRef']
            lon = bus['monitoredVehicleJourney']['vehicleLocation']['longitude']
            lat = bus['monitoredVehicleJourney']['vehicleLocation']['latitude']
            delay = delay_sec(bus['monitoredVehicleJourney']['delay'])
            dep_time = bus['monitoredVehicleJourney']['originAimedDepartureTime']

            const_line = [rec_ts, line, direction, date, lon, lat, delay, dep_time]

            for onward_call in bus['monitoredVehicleJourney']['onwardCalls']:
                lines.append(const_line + [stop_id(onward_call['stopPointRef']), int(onward_call['order'])])
        except Exception as e:
            if logging > 1:
                print(e)
            broken_lines += 1
    if logging > 0:
        print('Total # of broken records (entire vehicle journeys or onward calls):', broken_lines)
    
    return pd.DataFrame(lines, columns=header)

In [5]:
df = body_to_df(body, 2)
df

Total # of broken records (entire vehicle journeys or onward calls): 0


Unnamed: 0,Recorded_At,Line,Direction,Date,Lon,Lat,Delay,Departure_Time,Stop,Stop_Order
0,2023-12-09T12:40:11.245+02:00,13,2,2023-12-09,23.7545147,61.4910851,228,1205,0085,29
1,2023-12-09T12:40:11.245+02:00,13,2,2023-12-09,23.7545147,61.4910851,228,1205,2500,30
2,2023-12-09T12:40:11.245+02:00,13,2,2023-12-09,23.7545147,61.4910851,228,1205,2526,31
3,2023-12-09T12:40:11.245+02:00,13,2,2023-12-09,23.7545147,61.4910851,228,1205,2556,32
4,2023-12-09T12:40:11.245+02:00,13,2,2023-12-09,23.7545147,61.4910851,228,1205,2506,33
...,...,...,...,...,...,...,...,...,...,...
3628,2023-12-09T12:40:11.190+02:00,10,1,2023-12-09,23.8471432,61.4744263,221,1230,2012,32
3629,2023-12-09T12:40:11.190+02:00,10,1,2023-12-09,23.8471432,61.4744263,221,1230,2014,33
3630,2023-12-09T12:40:11.190+02:00,10,1,2023-12-09,23.8471432,61.4744263,221,1230,2016,34
3631,2023-12-09T12:40:11.190+02:00,10,1,2023-12-09,23.8471432,61.4744263,221,1230,2024,35


In [6]:
import json

In [7]:
with open('data/20231202T151821-api-call.txt') as f:
    contents = f.read()
    x = json.loads(contents)

In [9]:
df = body_to_df(x['body'], 2)
df

Total # of broken records (entire vehicle journeys or onward calls): 0


Unnamed: 0,Recorded_At,Line,Direction,Date,Lon,Lat,Delay,Departure_Time,Stop,Stop_Order
0,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4019,23
1,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4017,24
2,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4015,25
3,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4013,26
4,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4011,27
...,...,...,...,...,...,...,...,...,...,...
3639,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,5003,42
3640,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,5149,43
3641,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,0511,44
3642,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,0505,45
