In [8]:
# import useful libraries and packages
from reading_data import get_traffic_data, get_demand_data
from utils import get_OTP, get_PP2_A_to_B, get_direction
from plotting import plot_OTP

import pandas as pd

## Reading traffic and demand data

In [2]:
# read traffic data
df_traffic = get_traffic_data()

# read demand data
df_demand = get_demand_data()

## 1. Line passenger punctuality

### 1.1. OTP (excl. cancellations) for different delay thresholds and time periods

In [None]:
# calculate OTP in seconds
df_OTP = get_OTP(df_traffic)
df_OTP.sample(10)

# plot the OTP monthly for different thresholds
plot_OTP(df_OTP, periodicity='day')
plot_OTP(df_OTP, periodicity='hour')

### 1.2. CPM (incl. cancellations)

In [None]:
# plot_CPM(df_traffic, also="OTP")

### 1.3. PP2 (including passengers)

In [3]:
df_traffic.sample(3).T

Unnamed: 0,81940,25651,106744
Tåguppdrag,2309,2875,2842
Tågnr,2309,2875,2842
Tågordning uppdrag,1,1,1
Datum (PAU),2015-10-06,2015-09-20,2015-10-12
Tågslag,RST,RST,RST
UppehållstypAvgång,Passage,Uppehåll,Uppehåll
UppehållstypAnkomst,Uppehåll,Uppehåll,Passage
Delsträckanummer,8,16,30
Första platssignatur,BRO,BÅL,NYH
Första platssignatur för uppdrag,BRO,BÅL,NYH


In [21]:
# List of stations from Nyh to Bal
stations_order = ['Nyh', 'Gdv', 'Ngd', 'Öso', 'Ssä', 'Hfa', 'Ts', 'Kda', 'Vhe', 'Jbo', 'Hnd', 'Skg', 'Tåd', 'Fas', 'Äs', 'Åbe', 'Sst', 'Cst', 'Ke', 'Sub', 'Spå', 'Bkb', 'Jkb', 'Khä', 'Kän', 'Bro', 'Bål']
stations_order_upper = [station.upper() for station in stations_order]

station_A = 'Äs'
station_B = 'Nyh'
delay_threshold = 5
period='off_peak'

# Morning peak time period between 6.00 and 9.00, every 15 minutes
t_first = (6-1) * 4
t_last = 9 * 4
# by default peak morning, otherwise off-peak 9.00 to 15.00
if(period != 'morning_peak'):
    t_first = 9 * 4
    t_last = 15 * 4
t_period = range(t_first, t_last)

delay_threshold = delay_threshold * 60  # 5 minutes (by default) delay thresholds in seconds

# Calculate the total number of trips (T_total)
T_total = 0
for t in t_period:
    T_total += df_demand[t][station_A][station_B]

# Copy of the traffic data, drop all the cancelled departures
df_PP2 = df_traffic[df_traffic['Inställtflagga'] == 'N'].copy()

# Departure day
dep_day = pd.to_datetime('2015-10-14')

df_PP2['Datum (PAU)'] = pd.to_datetime(df_PP2['Datum (PAU)'],errors='coerce')
df_PP2['Ankomsttid'] = pd.to_datetime(df_PP2['Ankomsttid'],errors='coerce')
df_PP2['Avgångstid'] = pd.to_datetime(df_PP2['Avgångstid'], errors='coerce')
df_PP2['Planerad ankomsttid'] = pd.to_datetime(df_PP2['Planerad ankomsttid'],errors='coerce')
df_PP2['Planerad avgångstid'] = pd.to_datetime(df_PP2['Planerad avgångstid'],errors='coerce')


df_PP2 = df_PP2[pd.to_datetime(df_PP2['Datum (PAU)']) == dep_day]

# Drop departures in the other direction than from A to B
dir_A_B = get_direction(station_A, station_B, stations_order)
df_PP2 = df_PP2[df_PP2.apply(lambda x: get_direction(x['Första platssignatur'], x['Sista platssignatur'], stations_order_upper) == dir_A_B, axis=1)]

# Drop departures not serving the arrival station B
df_PP2 = df_PP2[df_PP2.apply(lambda x: get_direction(station_B.upper(), x['Sista platssignatur'], stations_order_upper)* dir_A_B>=0, axis=1)]

       
# Calculate the promised trips (T_promised)
T_promised = 0
for t in t_period:
    # Get number of passengers from A to B at time period t
    nb_pass = df_demand[t][station_A][station_B]

    # Filter all departures from A on the same departure day dep_day
    df_demand_A_B_dep_day = df_PP2[df_PP2['Från platssignatur'] == station_A].copy()

    # Drop all passed departures, happening earlier than t (+4 minutes for boarding/walking to the departure platform)
    df_demand_A_B_dep_day = df_demand_A_B_dep_day[((df_demand_A_B_dep_day['Planerad avgångstid'].dt.hour-1) * 4 + (df_demand_A_B_dep_day['Planerad avgångstid'].dt.minute - 4) / 15) > t]

    # Sort the departures in terms of departure time
    df_demand_A_B_dep_day_sorted = df_demand_A_B_dep_day.sort_values(by=['Planerad avgångstid'])

    if not df_demand_A_B_dep_day_sorted.empty:
        # Get the closest next scheduled/promised departure from A
        promised_departure_row = df_demand_A_B_dep_day_sorted.iloc[0]

        # Find the corresponding promised/scheduled arrival time to B using Tåguppdrag
        promised_train_arrival = df_PP2[(df_PP2['Tåguppdrag'] == promised_departure_row['Tåguppdrag']) & (df_PP2['Till platssignatur'] == station_B)]
        promised_arrival_time = promised_train_arrival['Planerad ankomsttid'].values[0] if not promised_train_arrival.empty else None

        if promised_arrival_time is None:
            print(promised_arrival_time)

        if promised_arrival_time is not None:
            # Find the actual departure and arrival times
            actual_departure_time = promised_departure_row['Avgångstid']
            actual_arrival_time = promised_train_arrival['Ankomsttid'].values[0] #if not promised_train_arrival.empty else None

            if actual_departure_time == 'Saknas -':  # departure is cancelled
                df_demand_A_B_dep_day_sorted = df_demand_A_B_dep_day_sorted[df_demand_A_B_dep_day_sorted['Avgångstid'] != 'Saknas -']
                # Find the next uncancelled departure
                if not df_demand_A_B_dep_day_sorted.empty:
                    next_departure_row = df_demand_A_B_dep_day_sorted.iloc[0]
                    actual_departure_time = next_departure_row['Avgångstid']
                    # Find the corresponding arrival
                    promised_train_arrival = df_PP2[(df_PP2['Tåguppdrag'] == next_departure_row['Tåguppdrag']) & (df_PP2['Till platssignatur'] == station_B)]
                    actual_arrival_time = promised_train_arrival['Ankomsttid'].values[0] #if not promised_train_arrival.empty else None

            if actual_arrival_time is not None:
                # Calculate the eventual delay in the arrival
                delay = (pd.to_datetime(actual_arrival_time) - pd.to_datetime(promised_arrival_time)).total_seconds()
                print(delay)
                
                # Accumulate if delay within threshold
                if delay <= delay_threshold:
                    T_promised += nb_pass

# Calculate the punctuality measure PP2
PP2 = T_promised / T_total if T_total > 0 else None
print(f'PP2 = {PP2}')

-120.0
None
None
-60.0
-60.0
None
None
-60.0
-60.0
-120.0
-120.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
60.0
PP2 = 0.8806008152349986


In [5]:
station_A = 'Äs'
station_B = 'Nyh'
delay_threshold = 5
period='off_peak'
PP2 = get_PP2_A_to_B(station_A, station_B, df_traffic, df_demand, delay_threshold, period)

-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
-60.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
60.0
60.0
0.0
PP2 = 0.7883400387864961
