# Vizualisation of the passenger punctuality results

### definition of functions for reading and preprocessing data

In [17]:
######################
##  Reading traffic data
#######################
def get_traffic_data():
    input_file = 'C:/Users/AbdouAA/Work Folders/Documents/GitHub/k2-respunkt/data/RST_2015_v38_42.csv'
    traffic_data = pd.read_csv(input_file, delimiter=';')
    return traffic_data


######################
##  Reading demand data
#######################
def get_demand_data():
    # load dataset
    input_file = 'C:/Users/AbdouAA/Work Folders/Documents/GitHub/k2-respunkt/data/OD_data_dynamic.xlsx'
    df_static = pd.read_excel(input_file, sheet_name='Static', index_col=0, header=0)

    # Extract headers and index from the static data
    headers = df_static.columns
    index = df_static.index

    # Number of time periods (every 15 minutes during a full day)
    nb_time_periods = int(24 * 60 / 15)  # 96 periods

    # Initialize a dictionary to store DataFrames for each time period
    df_sheets = {}

    # Read specific sheets by index
    for t in range(nb_time_periods):
        sheet_name = f"Sheet{t+1}"  # Assuming sheet names are "Sheet1", "Sheet2", ..., "Sheet96"
        df_temp = pd.read_excel(input_file, sheet_name=sheet_name, header=None)  # Read without headers
        
        # Assign the headers and index from the static data
        df_temp.columns = headers
        df_temp.index = index
        
        # Store the DataFrame in the dictionary
        df_sheets[t] = df_temp
    return df_sheets

import pandas as pd 

## Function to update/adjust static OD data for the specific studied line between Nyh and Bål
# Groups of stations
#C_stations = ['Äs', 'Åbe', 'Sst', 'Cst', 'Ke'] # central stations
def update_OD_Nyh_Bal(df):
    # Define station groups
    R35_stations = ['Nyh', 'Gdv', 'Ngd', 'Öso', 'Ssä', 'Hfa', 'Ts', 'Kda', 'Vhe', 'Jbo', 'Hnd', 'Skg', 'Tåd', 'Fas'] # eastern/right stations of line 35
    L35_stations = ['Sub', 'Spå', 'Bkb', 'Jkb', 'Khä', 'Kän', 'Bro', 'Bål'] # western/left stations of line 35
    R36_stations = ['Söc', 'Söd', 'Söu', 'Tul', 'Tu', 'Öte', 'Flb', 'Gn', 'Hu', 'Mö', 'Rön', 'Sta'] # eastern/right stations of line 36
    L36_stations = ['Sol', 'So', 'Udl', 'Upv', 'U', 'Hel', 'Hgv' , 'Kn', 'Mr', 'Nvk', 'Rs', 'R'] # western/left stations of line 36

    # Create a copy of the DataFrame to avoid modifying the original
    df_updated = df.copy()
    
    # Process trips from 35L to 36L
    for origin in L35_stations:
        for destination in L36_stations:
            df_updated.at[origin, 'Ke'] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
    
    # Process trips from 35L to 36R
    for origin in L35_stations:
        for destination in R36_stations:
            df_updated.at[origin, 'Äs'] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
            
    # Process trips from 35R to 36L
    for origin in R35_stations:
        for destination in L36_stations:
            df_updated.at[origin, 'Ke'] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
    
    # Process trips from 35R to 36R
    for origin in R35_stations:
        for destination in R36_stations:
            df_updated.at[origin, 'Äs'] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
    
    # Process trips from 36L to 35L
    for origin in L36_stations:
        for destination in L35_stations:
            df_updated.at['Ke', destination] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
            
    # Process trips from 36L to 35R
    for origin in L36_stations:
        for destination in R35_stations:
            df_updated.at['Äs', destination] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
            
    # Process trips from 36R to 35L
    for origin in R36_stations:
        for destination in L35_stations:
            df_updated.at['Ke', destination] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
    
    # Process trips from 36R to 35R
    for origin in R36_stations:
        for destination in R35_stations:
            df_updated.at['Äs', destination] += df.at[origin, destination]
            df_updated.at[origin, destination] = 0
    
    return df_updated

### Reading and preprocessing of traffic data

In [18]:
# read traffic data
df_traffic = get_traffic_data()

# List of columns to drop
columns_to_drop = [
    "Tågordning uppdrag",
    "Tågslag",
    "Avgångsplats",
    "Ankomstplats",
    "Första platssignatur för uppdrag",
    "Sista platssignatur för uppdrag",
    "Inställelseorsakskod",
    "Inställelseorsak",
    "Dragfordonsid",
    "Framförda tågkm",
    "Rapporterad tågvikt",
    "Rapporterad tåglängd",
    "Antal rapporterade hjulaxlar",
    "Antal rapporterade vagnar",
    "Inställtflagga"
]
# drop columns
df_traffic_filtered = df_traffic.drop(columns_to_drop, axis=1)

# rename the remaining columns, to remove space so that I can use pandas indexing
# Dictionary mapping old column names to new column names
columns_rename = {
    "Tåguppdrag": "Taguppdrag",
    "Tågnr": "Tagnr",
    "Datum (PAU)": "Datum",
    "UppehållstypAvgång": "UppehallstypAvgang",
    "UppehållstypAnkomst": "UppehallstypAnkomst",
    "Delsträckanummer": "Delstrackanummer",
    "Första platssignatur": "Forsta_platssignatur",
    "Sista platssignatur": "Sista_platssignatur",
    "Från platssignatur": "Fran_platssignatur",
    "Till platssignatur": "Till_platssignatur",
    "Sträcka med riktning": "Stracka_med_riktning",
    "Ankomsttid": "Ankomsttid",
    "Avgångstid": "Avgangstid",
    "Planerad ankomsttid": "Planerad_ankomsttid",
    "Planerad avgångstid": "Planerad_avgangstid",
    "Planeringsstatus": "Planeringsstatus"
}

# Rename the columns
df_traffic_renamed_all = df_traffic_filtered.rename(columns=columns_rename)

df_traffic_renamed_all['Ankomsttid'] = pd.to_datetime(df_traffic_renamed_all['Ankomsttid'], errors='coerce')
df_traffic_renamed_all['Avgangstid'] = pd.to_datetime(df_traffic_renamed_all['Avgangstid'], errors='coerce')
df_traffic_renamed_all['Planerad_ankomsttid'] = pd.to_datetime(df_traffic_renamed_all['Planerad_ankomsttid'], errors='coerce')
df_traffic_renamed_all['Planerad_avgangstid'] = pd.to_datetime(df_traffic_renamed_all['Planerad_avgangstid'], errors='coerce')

### Reading and preprocessing of demand data

In [19]:
#import numpy as np

# Define valid station values between Nyh and Bål
studied_line_35 = ['Nyh', 'Gdv', 'Ngd', 'Öso', 'Ssä', 'Hfa', 'Ts', 'Kda', 'Vhe', 'Jbo', 'Hnd', 'Skg', 'Tåd', 'Fas', 'Äs',
                'Åbe', 'Sst', 'Cst', 'Ke', 'Sub', 'Spå', 'Bkb', 'Jkb', 'Khä', 'Kän', 'Bro', 'Bål']

# read demand data
df_demand = get_demand_data()

# Number of time periods (every 15 minutes during a full day)
nb_time_periods = int(24 * 60 / 15)  # 96 periods
results = []
for t in range(nb_time_periods):
    # Update the OD matrix
    df_temp = update_OD_Nyh_Bal(df_demand[t])
    
    # Iterate over the updated DataFrame
    for origin in studied_line_35:
        for destination in studied_line_35:
            n_pass = df_temp.at[origin, destination]
            results.append({'from': origin, 'to': destination, 'time_period': t, 'n_pass': n_pass})
            #results.append([origin, destination, t, n_pass])

# Create DataFrame from results
df_demand_updated = pd.DataFrame(results)
#df_demand_updated_numpy = np.array(results)

### Function for getting direction and timestep

In [20]:
# Helper function to get direction
# +1 if going north
# -1 if going south
def get_direction(from_station, to_station, stations_south_to_north):
    if stations_south_to_north.index(from_station) < stations_south_to_north.index(to_station):
        return 1 # to the north
    elif stations_south_to_north.index(from_station) > stations_south_to_north.index(to_station):
        return -1 # to the south
    else:
        return 0
    
# function returning the index of the time period given a time
def get_t_float(time):
    return time.hour * 4. + time.minute / 15.
def get_t_int(time):
    return int(time.hour * 4. + time.minute/15.)

### Remove and merge train passages

In [21]:
# Initial filtering: Remove rows where both 'UppehallstypAvgang' and 'UppehallstypAnkomst' are 'Passage'
df_traffic_renamed = df_traffic_renamed_all[
    ~((df_traffic_renamed_all['UppehallstypAvgang'] == 'Passage') & 
      (df_traffic_renamed_all['UppehallstypAnkomst'] == 'Passage'))
]

# Identify rows with 'Passage' in either 'UppehallstypAvgang' or 'UppehallstypAnkomst'
passage_rows_avg = df_traffic_renamed[df_traffic_renamed['UppehallstypAvgang'] == 'Passage']
passage_rows_ank = df_traffic_renamed[df_traffic_renamed['UppehallstypAnkomst'] == 'Passage']

# Find the common train groups with 'Passage' in both avgång and ankomst
unique_ids = set(zip(passage_rows_avg['Taguppdrag'], passage_rows_avg['Datum'], passage_rows_avg['Tagnr'])) & \
             set(zip(passage_rows_ank['Taguppdrag'], passage_rows_ank['Datum'], passage_rows_ank['Tagnr']))

merged_rows = []

# Iterate over each unique (Taguppdrag, Datum, Tagnr) tuple
for tag_id, datum, tagnr in unique_ids:
    # Get the corresponding rows
    rows_avg = passage_rows_avg[(passage_rows_avg['Taguppdrag'] == tag_id) & 
                                (passage_rows_avg['Datum'] == datum) & 
                                (passage_rows_avg['Tagnr'] == tagnr)].sort_values(by="Delstrackanummer")
    
    rows_ank = passage_rows_ank[(passage_rows_ank['Taguppdrag'] == tag_id) & 
                                (passage_rows_ank['Datum'] == datum) & 
                                (passage_rows_ank['Tagnr'] == tagnr)].sort_values(by="Delstrackanummer")
    
    # Iterate over each row in the avgång and ankomst sets
    for i in range(len(rows_ank)):
        # Merge the rows according to the specified rules
        merged_row = rows_avg.iloc[i].copy()
        merged_row['UppehallstypAvgang'] = rows_ank['UppehallstypAvgang'].iloc[i]
        merged_row['UppehallstypAnkomst'] = rows_avg['UppehallstypAnkomst'].iloc[i]
        merged_row['Delstrackanummer'] = rows_ank['Delstrackanummer'].iloc[i]
        merged_row['Fran_platssignatur'] = rows_ank['Fran_platssignatur'].iloc[i]
        merged_row['Till_platssignatur'] = rows_avg['Till_platssignatur'].iloc[i]
        merged_row['Ankomsttid'] = rows_avg['Ankomsttid'].iloc[i]
        merged_row['Avgangstid'] = rows_ank['Avgangstid'].iloc[i]
        merged_row['Planerad_ankomsttid'] = rows_avg['Planerad_ankomsttid'].iloc[i]
        merged_row['Planerad_avgangstid'] = rows_ank['Planerad_avgangstid'].iloc[i]

        # Add the merged row to the list
        merged_rows.append(merged_row)

# Create a DataFrame from the merged rows
merged_rows_df = pd.DataFrame(merged_rows)

# Remove the original 'Passage' rows from the DataFrame
traffic_data_filtered = df_traffic_renamed[
    ~((df_traffic_renamed['UppehallstypAvgang'] == 'Passage') | 
      (df_traffic_renamed['UppehallstypAnkomst'] == 'Passage'))
]

# Add the merged rows back into the DataFrame
traffic_data_filtered = pd.concat([traffic_data_filtered, merged_rows_df], ignore_index=True)

# Sort by 'Datum', 'Taguppdrag', 'Tagnr', and 'Delstrackanummer'
traffic_data_filtered.sort_values(by=['Datum', 'Taguppdrag', 'Tagnr', 'Delstrackanummer'], inplace=True)

# Re-adjust 'Delstrackanummer' within each group to be consecutive
traffic_data_filtered['Delstrackanummer'] = traffic_data_filtered.groupby(['Datum', 'Taguppdrag', 'Tagnr']).cumcount() + 1

### Remove trains with departures in the day before 2015-09-14

In [22]:
# Extract the date part only
dates_planned = traffic_data_filtered['Planerad_avgangstid'].dt.date

# Identify trains departing before 2 am on 2015-09-14
early_morning_trains = traffic_data_filtered[
    (dates_planned == pd.to_datetime('2015-09-14').date()) & 
    (traffic_data_filtered['Planerad_avgangstid'].dt.hour < 2)
]

# Remove the identified trains
traffic_data_filtered = traffic_data_filtered.loc[~traffic_data_filtered.index.isin(early_morning_trains.index)]


# ## just for testing smaller instance
# traffic_data_filtered = traffic_data_filtered[
#     ((traffic_data_filtered['Datum'] == '2015-09-14') | 
#      (traffic_data_filtered['Datum'] == '2015-09-15') |
#      (traffic_data_filtered['Datum'] == '2015-09-16') |
#      (traffic_data_filtered['Datum'] == '2015-09-17') | 
#      (traffic_data_filtered['Datum'] == '2015-09-18') | 
#      (traffic_data_filtered['Datum'] == '2015-09-21'))
# ]

### Remove cancelled trains and correct missing times

In [23]:
## correct missing Ankomsttid when Avgangstid exist using Avgangstid and Planerad_avgangstid and Planerad_ankomsttid
# Step 1: Filter out only planned (non-cancelled) trains
df_traffic_no_cancellation = traffic_data_filtered[traffic_data_filtered["Planeringsstatus"] == "P"].copy()

# Step 2: Correct missing Ankomsttid using Avgangstid
# Filter rows where Ankomsttid is missing but Avgangstid exists
missing_arrival_mask = df_traffic_no_cancellation['Ankomsttid'].isna() & df_traffic_no_cancellation['Avgangstid'].notna()

# Calculate the difference between Planerad_ankomsttid and Planerad_avgangstid (in minutes)
df_traffic_no_cancellation['planned_runtime'] = (df_traffic_no_cancellation['Planerad_ankomsttid'] - df_traffic_no_cancellation['Planerad_avgangstid']).dt.total_seconds() / 60

# Calculate the expected Ankomsttid by adding the time difference to Avgangstid
df_traffic_no_cancellation.loc[missing_arrival_mask, 'Ankomsttid'] = df_traffic_no_cancellation.loc[missing_arrival_mask, 'Avgangstid'] + pd.to_timedelta(df_traffic_no_cancellation.loc[missing_arrival_mask, 'planned_runtime'], unit='m')

### Set we do the calculations using actual or planned times

In [24]:
actual_or_planned = 'actual' # or 'actual'
if actual_or_planned != 'planned':
    # if actual

    # Step 1: Group the data by 'Datum', 'Taguppdrag', 'Tagnr'
    grouped = df_traffic_no_cancellation.groupby(['Datum', 'Taguppdrag', 'Tagnr'])

    # Step 2: Identify groups where any 'Ankomsttid' or 'Avgangstid' is missing
    groups_with_missing_values = grouped.filter(lambda group: group[['Ankomsttid', 'Avgangstid']].isnull().any().any())

    # Step 3: Remove those groups from the original DataFrame
    df_traffic_no_cancellation = df_traffic_no_cancellation[~df_traffic_no_cancellation.index.isin(groups_with_missing_values.index)]

    # Step 5: Reset index if needed
    df_traffic_no_cancellation = df_traffic_no_cancellation.reset_index(drop=True)

### Calculate delays for non-cancelled trains

In [25]:
## append delay 
df_traffic_no_cancellation.loc[:,"dep_delay"] = df_traffic_no_cancellation["Avgangstid"]-df_traffic_no_cancellation["Planerad_avgangstid"]
df_traffic_no_cancellation.loc[:,"arr_delay"] = df_traffic_no_cancellation["Ankomsttid"]-df_traffic_no_cancellation["Planerad_ankomsttid"]

# Lägg till kolumn för tidsperiod för avgång
if actual_or_planned == 'planned':
    # if planned
    df_traffic_no_cancellation.loc[:,'dep_t_float'] = df_traffic_no_cancellation['Planerad_avgangstid'].apply(get_t_float)
    df_traffic_no_cancellation.loc[:,'arr_t_float'] = df_traffic_no_cancellation['Planerad_ankomsttid'].apply(get_t_float)
else:
    # if actual
    df_traffic_no_cancellation.loc[:,'dep_t_float'] = df_traffic_no_cancellation['Avgangstid'].apply(get_t_float)
    df_traffic_no_cancellation.loc[:,'arr_t_float'] = df_traffic_no_cancellation['Ankomsttid'].apply(get_t_float)

# Function to convert timedelta to minutes, negative values and NaT to zero
def timedelta_to_minutes(td):
    if pd.isna(td) or td.total_seconds() < 0:
        return 0
    else:
        return td.total_seconds() / 60

# Apply the function to 'dep_delay' and 'arr_delay' columns
df_traffic_no_cancellation.loc[:,'dep_delay_minutes'] = df_traffic_no_cancellation['dep_delay'].apply(timedelta_to_minutes)
df_traffic_no_cancellation.loc[:,'arr_delay_minutes'] = df_traffic_no_cancellation['arr_delay'].apply(timedelta_to_minutes)

### Split dataframes into north and south-going trains

In [26]:
# List of stations from south to north
stations_south_to_north = ['Bål', 'Bro', 'Kän', 'Khä', 'Jkb', 'Bkb', 'Spå', 'Sub', 'Ke', 'Cst', 'Sst', 'Åbe', 'Äs',
                           'Fas', 'Tåd', 'Skg', 'Hnd', 'Jbo', 'Vhe', 'Kda', 'Ts', 'Hfa', 'Ssä', 'Öso', 'Ngd', 'Gdv', 'Nyh']

# Determine direction and split the data
df_traffic_no_cancellation.loc[:,'direction'] = df_traffic_no_cancellation.apply(lambda row: get_direction(row['Fran_platssignatur'], row['Till_platssignatur'], stations_south_to_north), axis=1)
df_to_north = df_traffic_no_cancellation[df_traffic_no_cancellation['direction'] == 1].copy()
df_to_south = df_traffic_no_cancellation[df_traffic_no_cancellation['direction'] == -1].copy()

# Step 1: Process for each dataframe (e.g., df_to_north)
def process_directional_dataframe(df):
    # Group by departure station and sort by planned departure time
    
    # if planned 
    if actual_or_planned == 'planned':
    # if planned
        df_sorted = df.sort_values(by=['Fran_platssignatur', 'Planerad_avgangstid'])
    else:
        df_sorted = df.sort_values(by=['Fran_platssignatur', 'Avgangstid'])

    # Calculate previous time periods
    df_sorted['prev_dep_t_float'] = df_sorted.groupby('Fran_platssignatur')['dep_t_float'].shift(1, fill_value=0)

    # the first train of the day has prev in the day before and (prev_t>curr_t)
    # also if you remove trains flagged with A or I, you won't get prev_t = curr_t
    df_sorted.loc[df_sorted['prev_dep_t_float'] > df_sorted['dep_t_float'],'prev_dep_t_float'] = 0. 
    
    return df_sorted.copy()

df_to_north = process_directional_dataframe(df_to_north)
df_to_south = process_directional_dataframe(df_to_south)

### Populate with boarding passengers

In [27]:
def get_n_boarding_vectorized_all_stations():
    # This function returns a DataFrame with total passengers boarding at each station between different time periods for each destination
    boarding_data = df_demand_updated.copy()

    # Group by 'from', 'time_period' and 'to' and sum 'n_pass' for each destination
    boarding_sum = (boarding_data.groupby(['from', 'time_period', 'to'])['n_pass']
                                   .sum()
                                   .unstack(fill_value=0)
                                   .reindex(columns=studied_line_35, fill_value=0))

    return boarding_sum

def get_n_boarding_float(data_boarding, t_prev, t_curr):
    res_boardings = data_boarding.loc[int(t_prev):int(t_curr)].sum()
    res_boardings = res_boardings - data_boarding.loc[int(t_prev)]*(t_prev-int(t_prev))
    res_boardings = res_boardings - data_boarding.loc[int(t_curr)]*(int(t_curr)+1-t_curr)
    return res_boardings

# Step 2: Calculate passengers boarding
def calculate_boarding_passengers(df, boarding_sum):
    # this function considers the direction of travel

    # Initialize new columns for boardings with zeros
    df[studied_line_35] = 0.0

    # Group the data by 'Fran_platssignatur'
    grouped = df.groupby('Fran_platssignatur')

    # For each group, perform the vectorized calculation
    for from_station, group in grouped:
        # Get the previous and current time periods
        prev_t = group['prev_dep_t_float'].values # use float instead
        curr_t = group['dep_t_float'].values

        # Compute the cumulative boarding for each destination over the period
        for i, index in enumerate(group.index):
            n_boarding = get_n_boarding_float(boarding_sum.loc[from_station], prev_t[i], curr_t[i])
            df.loc[index, studied_line_35] = n_boarding[studied_line_35]

    return df

# Precompute the boarding sums for all stations
boarding_sum = get_n_boarding_vectorized_all_stations()

# Apply the calculation to the northbound and southbound dataframes
#df_to_north_boardings = calculate_boarding_passengers(df_to_north, boarding_sum)
# df_to_south_boardings = calculate_boarding_passengers(df_to_south, boarding_sum)

In [28]:
from joblib import Parallel, delayed

# Parallel execution
results = Parallel(n_jobs=2)(
    delayed(calculate_boarding_passengers)(df, boarding_sum)
    for df in [df_to_north, df_to_south]
)

# Unpack the results
df_to_north_boardings, df_to_south_boardings = results

### Calculte and populate with passengers onboard

In [29]:
def calculate_onboard_passengers(df):
    # Step 3: calculate the passengers onboard using the boarding passengers
    #df['n_onboard'] = 0.0  # Initialize the onboard column
    
    def calculate_onboard(group):
        # Sort by 'Delstrackanummer' to get the correct order of stops
        group_sorted = group.sort_values(by='Delstrackanummer').copy()
        
        # Initialize the onboard count
        group_sorted['n_onboard'] = 0.0
        
        for i, index in enumerate(group_sorted.index):
            # Current station data
            curr_row = group_sorted.iloc[i]
            next_stations = group_sorted.iloc[i:]['Till_platssignatur'].tolist()

            # Total passengers boarding at this stop to any future stop
            total_boarding = curr_row[next_stations].sum()
            group_sorted.loc[index, 'n_boarding'] = total_boarding
            # Create a boolean array indicating which stations are not in the next_stations list
            not_next_stations = ~pd.Series(studied_line_35).isin(next_stations)
            # Set the values to 0 for the stations not in the next_stations list
            group_sorted.loc[index, pd.Series(studied_line_35)[not_next_stations]] = 0
            if i == 0:
                # First stop, no previous passengers onboard
                group_sorted.loc[index, 'n_onboard'] = total_boarding
                group_sorted.loc[index, 'n_alighting'] = 0. 
            else:
                # Passengers onboard from the previous stop
                prev_onboard = group_sorted.iloc[i-1]['n_onboard']
                
                # Passengers alighting at this stop: boarding from previous stops to this one
                n_alighting = group_sorted.iloc[:i][curr_row['Fran_platssignatur']].sum()
                group_sorted.loc[index, 'n_alighting'] =n_alighting

                # Update the onboard count
                n_onboard = prev_onboard + total_boarding - n_alighting
                group_sorted.loc[index, 'n_onboard'] = n_onboard
 
            # save the alighting in the next station
            n_alighting_next = group_sorted.iloc[:i+1][curr_row['Till_platssignatur']].sum()
            group_sorted.loc[index, 'n_alighting_next'] = n_alighting_next

        return group_sorted
    
    # Apply the calculation for each train and date combination
    df_res = df.groupby(['Taguppdrag', 'Tagnr', 'Datum']).apply(calculate_onboard, include_groups=False)
    df_res = df_res.reset_index().drop(columns=['level_3'])
    
    return df_res

# Apply the calculation to the northbound and southbound dataframes
#df_to_north_onboard = calculate_onboard_passengers(df_to_north_boardings)
# df_to_south_onboard = calculate_onboard_passengers(df_to_south_boardings)

In [30]:
# Parallel execution
results = Parallel(n_jobs=2)(
    delayed(calculate_onboard_passengers)(df)
    for df in [df_to_north_boardings, df_to_south_boardings]
)

# Unpack the results
df_to_north_onboard, df_to_south_onboard = results


# Combine northbound and southbound dataframes
df_onboard_concat = pd.concat([df_to_north_onboard, df_to_south_onboard])

In [31]:
# Reset index if necessary
df_onboard = df_onboard_concat.reset_index(drop=True)#.drop(columns=studied_line_35, axis=1)
if actual_or_planned == 'planned':
# if planned
    df_onboard.to_csv('df_onboard_planned.csv', index=False)  
else:
    df_onboard.to_csv('df_onboard_actual.csv', index=False)  