In [1]:
import requests
import pandas as pd

In [16]:
URL = 'http://data.itsfactory.fi/journeys/api/1/vehicle-activity'

response = requests.get(URL)
response.status_code

403

In [17]:
response = requests.get(URL, headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0'})
response.status_code

200

In [18]:
response = response.json()
body = response['body']

In [19]:
type(body)

list

In [20]:
def body_to_df(body: list, logging: int = 0) -> pd.DataFrame:
    """
    Function that takes the body of the GET request to JourneysAPI Vehicle Activity endpoint and flattens it to a dataframe.

    params:
        - body: requests.get(URL).json()['body']
        - logging: <= 0 - no logging/printing, >= 1 - print number of errors when flattening, >= 2 print every error message
    """
    def delay_sec(s: str) -> int:
        if s[0] == '-':
            neg = -1
            s = s[11:len(s)-5].split('M')
        else:
            neg = 1
            s = s[10:len(s)-5].split('M')
        return neg * (60*int(s[0]) + int(s[1]))

    def stop_id(s: str) -> str:
        return s[-4:]

    header = ['Recorded_At', 'Line', 'Direction', 'Date', 'Lon', 'Lat', 'Delay', 'Departure_Time', 'Stop', 'Stop_Order']
    
    lines = []
    broken_lines = 0
    for bus in body:
        try:
            rec_ts = bus['recordedAtTime']
            line = bus['monitoredVehicleJourney']['lineRef']
            direction = bus['monitoredVehicleJourney']['directionRef']
            date = bus['monitoredVehicleJourney']['framedVehicleJourneyRef']['dateFrameRef']
            lon = bus['monitoredVehicleJourney']['vehicleLocation']['longitude']
            lat = bus['monitoredVehicleJourney']['vehicleLocation']['latitude']
            delay = delay_sec(bus['monitoredVehicleJourney']['delay'])
            dep_time = bus['monitoredVehicleJourney']['originAimedDepartureTime']

            const_line = [rec_ts, line, direction, date, lon, lat, delay, dep_time]

            for onward_call in bus['monitoredVehicleJourney']['onwardCalls']:
                lines.append(const_line + [stop_id(onward_call['stopPointRef']), int(onward_call['order'])])
        except Exception as e:
            if logging > 1:
                print(e)
            broken_lines += 1
    if logging > 0:
        print('Total # of broken records (entire vehicle journeys or onward calls):', broken_lines)
    
    return pd.DataFrame(lines, columns=header)

In [21]:
df = body_to_df(body, 2)
df

Total # of broken records (entire vehicle journeys or onward calls): 0


Unnamed: 0,Recorded_At,Line,Direction,Date,Lon,Lat,Delay,Departure_Time,Stop,Stop_Order
0,2023-12-16T08:40:23.180Z,14,1,2023-12-16,23.6269417,61.5179062,-206,0750,1665,51
1,2023-12-16T08:40:23.180Z,14,1,2023-12-16,23.6269417,61.5179062,-206,0750,1663,52
2,2023-12-16T08:40:23.180Z,14,1,2023-12-16,23.6269417,61.5179062,-206,0750,1672,53
3,2023-12-16T08:40:25.216Z,13,2,2023-12-16,23.7711124,61.4909782,77,0805,2500,30
4,2023-12-16T08:40:25.216Z,13,2,2023-12-16,23.7711124,61.4909782,77,0805,2526,31
...,...,...,...,...,...,...,...,...,...,...
3860,2023-12-16T08:40:24.210Z,91,1,2023-12-16,23.8271275,61.5636063,22,0840,5409,6
3861,2023-12-16T08:40:24.210Z,91,1,2023-12-16,23.8271275,61.5636063,22,0840,5407,7
3862,2023-12-16T08:40:24.210Z,91,1,2023-12-16,23.8271275,61.5636063,22,0840,5405,8
3863,2023-12-16T08:40:24.210Z,91,1,2023-12-16,23.8271275,61.5636063,22,0840,5403,9


In [13]:
import json

In [14]:
with open('data/20231202T151821-api-call.txt') as f:
    contents = f.read()
    x = json.loads(contents)

In [15]:
df = body_to_df(x['body'], 2)
df

Total # of broken records (entire vehicle journeys or onward calls): 0


Unnamed: 0,Recorded_At,Line,Direction,Date,Lon,Lat,Delay,Departure_Time,Stop,Stop_Order
0,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4019,23
1,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4017,24
2,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4015,25
3,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4013,26
4,2023-12-02T17:18:18.227+02:00,19,1,2023-12-02,23.8561306,61.4816513,236,1651,4011,27
...,...,...,...,...,...,...,...,...,...,...
3639,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,5003,42
3640,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,5149,43
3641,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,0511,44
3642,2023-12-02T17:18:18.263+02:00,90,1,2023-12-02,23.9137268,61.5088387,149,1650,0505,45
