In [42]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import random
import json
import re

# 1. Establishing Basic Helper Functions
Some basic functions are declared to get certain basic information.

In [None]:
def getServicePollDate(file_path):
    """
    Purpose: 
        Get basic information surrounding a dataset.

    Args:
        - file_path: Path to the csv file.

    Returns:
        The service, its polling rate and date of the dataset.
    """

    file_name = re.search(r'[^/\\]+$', file_path).group()

    all_info = file_name.split("_")
    service = all_info[2]
    poll_rate = all_info[3]
    date = datetime.strptime(all_info[4].split(".")[0], '%d-%m-%Y')

    return [service, poll_rate, date]

In [43]:
def distinguish_one_trip_data(df,tripID):
    """
    Purpose: 
        Get a trip's information, sorted by the timestamp and stopsequences in the dataframe.

    Args:
        - df: dataframe containing different trips information.
        - tripID: ID of the trip interested.

    Returns:
        A sorted dataframe of a particular trip. 
    """
    result = df[df["tripId"] == tripID].sort_values(by=['vehicleTimestamp', 'vehicleStopSequence'])
    return result[["tripId","vehicleTimestamp","vehicleCurrentStatus", "vehicleStopSequence", "vehicleStopID"]]

In [50]:
def getUniqueTripIDs(df):
    """
    Purpose: 
        Get a list of unique TripIds in a df.

    Args:
        - df: Takes in a df containing information of multiple trips.

    Returns:
        A list of unique tripIds.
    """
    return df["tripId"].unique()

# 2. Read the CSV

In [46]:
# Read Stops.txt and csv
stopsTxt = pd.read_csv("stops.txt", delimiter='\t')
file_path = "./raw_data/gtfs_data_2_15s_09-10-2023.csv"



orig_df = pd.read_csv(file_path)
orig_df["vehicleTimestamp"] = pd.to_datetime(orig_df["vehicleTimestamp"], unit="s")


service_poll_date = getServicePollDate(file_path)
service = service_poll_date[0]
poll_rate = service_poll_date[1]
date = service_poll_date[2]

print("Service is:", service)
print("Poll rate is:", poll_rate)
print("Date is:", date)

Service is: 2
Poll rate is: 15s
Date is: 2023-10-09 00:00:00


# 3. Cleaning and Transformation

In [47]:
# Cleaning & Transforming #
orig_df.drop(["pippenId", "pippenCreatedAt", "pippenPollingRate", "tripStartTime", "tripStartDate", "tripRouteId"], axis=1,inplace = True)
orig_df["tripDirectionId"] = np.where((orig_df["vehicleLabel"] == "FX2 To Portland") | (orig_df["vehicleLabel"] == "FX2 To NW 5th & Hoyt") | (orig_df["vehicleLabel"] == "FX2 To NW Irving & 5th"), 1, 0)
orig_df.drop_duplicates(inplace=True)
orig_df = orig_df[orig_df.vehicleStopSequence != 0]
orig_df.sort_values(by=['tripId', 'vehicleTimestamp'], inplace=True)

# Create a new DataFrame to store the sorted results
sorted_df = pd.DataFrame(columns=orig_df.columns)

for trip_id, group in orig_df.groupby('tripId'):
    sorted_group = group.sort_values(by='vehicleTimestamp', ascending=True)
    sorted_df = pd.concat([sorted_df, sorted_group], ignore_index=True)

# Reset the index of the sorted DataFrame
sorted_df.reset_index(drop=True, inplace=True)

  sorted_df = pd.concat([sorted_df, sorted_group], ignore_index=True)


# 4. Splitting the dataset into respective directions

In [48]:
# full_seq_dfs = orig_df.groupby('tripId').filter(lambda x: ((x.vehicleStopSequence.max() >= 42) & (x.vehicleStopSequence.min() <= 2)))
full_seq_dfs = sorted_df.groupby('tripId').filter(lambda x: ((x.vehicleStopSequence.max() >= 42) & (x.vehicleStopSequence.min() <= 2)))
print("Total number of records in both (0 & 1) directions:",len(full_seq_dfs))
print("Total number of trips in both (0 & 1) directions:",len(full_seq_dfs["tripId"].unique()))

Total number of records in both (0 & 1) directions: 29878
Total number of trips in both (0 & 1) directions: 171


In [49]:
zeroTrips = full_seq_dfs[full_seq_dfs["tripDirectionId"] == 0].reset_index(drop=True)
oneTrips = full_seq_dfs[full_seq_dfs["tripDirectionId"] == 1].reset_index(drop=True)

zeroTripIDs = zeroTrips["tripId"].unique()
oneTripIDs = oneTrips["tripId"].unique()

print("Number of Trips in 0 direction:",len(zeroTripIDs))
print("Number of Trips in 1 direction:",len(oneTripIDs))

Number of Trips in 0 direction: 85
Number of Trips in 1 direction: 86


# 5. Get Timings and Impute the missing Timings

In [44]:
def get_timings(df, tripID, expectedNumSequence):
    """
    Purpose: 
        Get a trip's arrival and departure timing. For 1 trip only.

    Args:
        - df: dataframe containing different trips information.
        - tripID: ID of the trip interested.
        - expectedNumSequence: Number of stops expected.

    Returns:
        A 2d list containing all stops, and in each stop, a list of arrival and departure timings. 
    """

    one_trip_df = distinguish_one_trip_data(df,tripID).copy()
    one_trip_df.reset_index(drop=True,inplace=True)
    one_trip_df2 = one_trip_df.groupby("vehicleStopSequence")

    counter = 1 # Starting sequence will be 1
    timings = [] # Output
    curStopArr = 0
    prevStopDep = 0
    
    for stopSequence, group in one_trip_df2:
        # This level deals with each stopSequece group as a whole. [Eg. All sequences from 1 to expectedNumSequence]

        # Ignore if 0. There were some off cases where stopSequence = 0. Ignore
        if stopSequence == 0: 
            continue
        
        # Add placeholder timestamps for missing sequences until counter matches the stopSequence. 
        # [Eg. Data only has stop sequence 20 to 30. This WHILE loop adds in [0,0] from 1 to 19]
        while counter < stopSequence:
            timings.append([0, 0])
            counter += 1

        for row_index, row in group.iterrows():
            # This level deals with a individual sequenceGroup's entries. [Eg. All entries within stopSequence = 5]

            # Stiutation 1: Only have one entry [Eg. stopSequence = 5 only have 1 entry]
            if len(group) == 1:
                # Status = IN_TRANSIT_TO, prevStopDep not set. Take the current StopSequence timestamp as the departure time of the previous stopSequence.
                # [Eg. stopSequence = 5 timestamp is stopSequence 4 departure]
                if row["vehicleStopSequence"] == counter and row["vehicleCurrentStatus"] == "IN_TRANSIT_TO" and prevStopDep == 0:
                    prevStopDep = row["vehicleTimestamp"]
                    timings[stopSequence-2][1] = prevStopDep

                # Status = STOPPED_AT, curStopArr not set. Take the current StopSequence timestamp as its arrival time.
                if row["vehicleCurrentStatus"] == "STOPPED_AT" and curStopArr == 0:
                    curStopArr = row["vehicleTimestamp"]

            # Situation 2: Multiple entries [Eg. stopSequence = 5 have 10 entries]
            else: 
                # Status = IN_TRANSIT_TO, prevStopDep not set. Continue is used here to take the current StopSequence first entry's timestamp as the departure time of the previous stopSequence.
                # [Eg. stopSequence = 5 have 10 entries, take first timestamp as stopSequence = 4 departure]
                if row["vehicleStopSequence"] == counter and row["vehicleCurrentStatus"] == "IN_TRANSIT_TO" and prevStopDep == 0:
                    prevStopDep = row["vehicleTimestamp"]
                    timings[stopSequence-2][1] = prevStopDep
                    continue

                # Get last transit as pass by timing
                if row["vehicleStopSequence"] == counter and row["vehicleCurrentStatus"] == "IN_TRANSIT_TO" and curStopArr == 0:
                    curStopArr = 0

                # Status = STOPPED_AT, curStopArr not set. Break is used here to take the current StopSequence first entry's timestamp as its arrival.
                # [Eg. stopSequence = 5 have 10 entries, take first timestamp as its arrival]
                elif row["vehicleCurrentStatus"] == "STOPPED_AT" and curStopArr == 0:
                    curStopArr = row["vehicleTimestamp"]
                    break
        
        # Reset the curStopArr & prevStopDep, increment the counter, append curStopArr for the current stopSequence.
        # [Eg. Append the curStopArr and departure = 0 for stopSequence = 5]
        if stopSequence <= expectedNumSequence:
            timings.append([curStopArr,0])
            curStopArr = 0
            prevStopDep = 0
            counter += 1

    # Add placeholder timings for missing stopSequence until it hits expectedNumSequence
    while len(timings) < expectedNumSequence:
        timings.append([0, 0])

    # Return the timings
    return timings

In [53]:
def direction_trip_timings(directedTrips, num_stops):
    """
    Purpose: 
        Get all trip's arrival and departure timing for a particular direction. For multiple trips.

    Args:
        - directedTripIds: TripIds of all trips going in the same direction.
        - expectedNumSequence: Number of stops expected.

    Returns:
        A 3d list containing all trips. For all trips, it will contain a 2d list of all stops, and in each stop, a list of arrival and departure timings. 
    """
    result = []
    directedTripIds = getUniqueTripIDs(directedTrips)
    for i in range(len(directedTripIds)):
        result.append(get_timings(directedTrips, directedTripIds[i], num_stops))

    return result

In [51]:
def fix_timing(timing):
    """
    Purpose: 
        Imputes missing timings by taking the difference between last known departure timing and the next known arrival timing, 
        subsequently divided by the number of missing stop sequences. Fixes for 1 trip only.

        All imputed timings for a missing stop will treat the stop as a passby stop.

    Args:
        - timing: Takes in a 2d list of timings containing all stops, and in each stop, a list of arrival and departure time.

    Returns:
        A 2d list in the same shape with the imputed times.
    """
    for i in range(len(timing)):
        departure = timing[i][1]
        if departure == 0:
            last_known_departure = timing[i - 1][1] if i > 0 else None
            next_known_arrival = None
            for j in range(i + 1, len(timing)):
                next_arr = timing[j][0]
                next_dep = timing[j][1]
                if next_arr != 0:
                    next_known_arrival = next_arr
                    break
                elif next_arr == 0 and next_dep != 0:
                    next_known_arrival = next_dep
                    break

            # Fix for missing first stop departure
            if i == 0:
                if timing[i+1][0] != 0:
                    timing[i][1] = timing[i+1][0] - timedelta(seconds = 30)
                elif timing[i+1][1] != 0: 
                    timing[i][1] = timing[i+1][1] - timedelta(seconds = 30)
                else:
                    timing[i][1] = timing[i][0] + timedelta(seconds = 30)

            # Fix for missing last stop arrival
            if i == len(timing)-1:
                if timing[i-1][1] != 0:
                    timing[i][0] = timing[i-1][1] + timedelta(seconds = 30)

            
            if last_known_departure is not None and next_known_arrival is not None:
                num_missing_stops = j - i
                time_difference = (next_known_arrival - last_known_departure).total_seconds()
                time_interval = timedelta(seconds=time_difference / (num_missing_stops + 1))

                for k in range(i, j):
                    if timing[k][1] == 0:
                        last_known_departure += time_interval
                        if timing[k][0]!= 0 and timing[k][0] > last_known_departure:
                            difference = (timing[k][0] - last_known_departure).total_seconds()
                            timing[k][1] = last_known_departure.replace(microsecond=0) + timedelta(seconds=difference)
                        else:
                            timing[k][1] = last_known_departure.replace(microsecond=0)

In [52]:
def fix_direction_timings(all_trip_timings):
    """
    Purpose: 
        Fix the timings for multiple trips by calling the fix_timing() function. For multiple trips.

    Args:
        - all_trip_timings: A 3d list of multiple trips with their respective stop's arrival and departure timings.

    Returns:
        A 3d list in the same shape, with the imputed timings.
    """
    for each_trip_timings in all_trip_timings:
        fix_timing(each_trip_timings)

In [54]:
zeroTimings = direction_trip_timings(zeroTrips,42)
oneTimings = direction_trip_timings(oneTrips,42)
fix_direction_timings(zeroTimings)
fix_direction_timings(oneTimings)

# 6. Get Interstation

In [None]:
def get_interstation(tripTimings):
    """
    Purpose: 
        Get the interstation travel time between stops. For 1 trip only.

    Args:
        - tripTimings: A 2d list of arrival and departure timings for each stop.

    Returns:
        Returns a list of interstation travel timings in seconds. 
    """

    result = []
    
    for i in range(len(tripTimings)-1):
        stopAArr = tripTimings[i][0]
        stopADep = tripTimings[i][1]
        stopBArr = tripTimings[i+1][0]
        stopBDep = tripTimings[i+1][1]
        
        if stopBArr == 0 and stopBDep != 0:
            stopBArr = stopBDep

        if stopADep == 0 and stopAArr != 0:
            stopADep = stopAArr + timedelta(seconds=30)

        if stopADep == 0 and stopAArr == 0:
            stopADep = stopBArr - timedelta(seconds=30)

        if stopBArr == 0 and stopBDep == 0:
            stopBArr = stopADep + timedelta(seconds=30)
        
        interstation = stopBArr - stopADep

        result.append(int(interstation.total_seconds()))
    return result

In [None]:
def direction_trips_interstation(directedTrips):
    """
    Purpose: 
        Get the multiple trip's interstation travel time between stops. For multiple trips.

    Args:
        - directedTrips: A 3d list of multiple trips and its respective arrival and departure timings for each stop.

    Returns:
        Returns a list of interstation travel timings in seconds for all trupss. 
    """
    result = []
    for each_trip in directedTrips:
        each_trip_interstation = get_interstation(each_trip)
        result.append(each_trip_interstation)
    return result


In [None]:
zero_interstation = direction_trips_interstation(zeroTimings)
one_interstation = direction_trips_interstation(oneTimings)

# 7. Establishing Additional Helper Functions
Additional helper functions are declared to generate values for the "input.json".

In [None]:
def generate_arrival_rate(directedDF, num_stops, input_key_stops):
    """
    Generate arrival rates of passengers arrival per second for stops in a bus route.
    It is used to get information to be converted into JSON format for use in the model.


    This function calculates arrival rates for each stop in a bus route based on specific criteria, including key stops and stop order.
    Arrival rates represent the expected rate of passengers arriving at each stop per second.

    Args:
        df (pd.DataFrame): A DataFrame containing bus route data, including columns like "vehicleStopID" and "tripDirectionId."
        num_stops (int): The total number of stops in the route.
        input_key_stops (list): A list of stop IDs considered as key stops that should have higher arrival rates.

    Returns:
        arrival_rate_list (list): A list of arrival rates of passengers for each stop in the route.

    Notes:
        - key stops are deterministically higher than non-key stops
        - Last stop has 0 passengers arriving
        - Range of 0 to 0.1 (aka maximum of 6pax/min)
    """ 

    arrival_rate_list = []

    stops_in_route = directedDF["vehicleStopID"].unique()
    for i in range(num_stops-1):
        arr = 0

        if stops_in_route[i] in input_key_stops:
            arr += 0.05

        if i <(num_stops//2):
            arr += round(random.uniform(0.005, 0.03),3)
        else:
            arr += round(random.uniform(0.005, 0.049),3)

        arrival_rate_list.append(arr)

    arrival_rate_list.append(0)
    return arrival_rate_list

In [None]:
def generate_alighting_percentage(directedDF, num_stops, input_key_stops):
    """
    Generate alighting percentages of passengers for stops in a bus route.

    This function calculates alighting percentages for each stop in a bus route based on specific criteria, including key stops, direction, and stop order.
    Alighting percentages represent the expected percentage of passengers alighting at each stop, ranging from 0 to 1.

    Args:
        df (pd.DataFrame): A DataFrame containing bus route data, including columns like "vehicleStopID" and "tripDirectionId."
        num_stops (int): The total number of stops in the route.
        input_key_stops (list): A list of stop IDs considered as key stops that should have higher alighting percentages.
        directionId (int): The direction identifier for the bus route.

    Returns:
        alighting_percentage_list (list): A list of alighting percentages for each stop in the route.

    """

    alighting_percentage_list = []

    stops_in_route = directedDF["vehicleStopID"].unique()

    alighting_percentage_list.append(0)

    for i in range(num_stops-3):
        a = 0

        if stops_in_route[i] in input_key_stops:
            a += 0.02

        if i <(num_stops//2):
            a += round(random.uniform(0, 0.6),3)
        else:
            a += round(random.uniform(0, 0.97),3)
            
        alighting_percentage_list.append(a)

    alighting_percentage_list.append(1)

    return alighting_percentage_list

In [None]:
def generate_initial_passengers(directedDF, num_stops, input_key_stops):
    """
    Generate initial passenger counts for stops in a bus route.

    This function calculates initial passenger counts for each stop in a bus route based on specific criteria, including key stops and stop order.
    Initial passenger counts represent the number of passengers present when the bus starts its route.

    Args:
        df (pd.DataFrame): A DataFrame containing bus route data, including columns like "vehicleStopID."
        num_stops (int): The total number of stops in the route.
        input_key_stops (list): A list of stop IDs considered as key stops that should have higher initial passenger counts.

    Returns:
        initial_passengers_list (list): A list of initial passenger counts for each stop in the route.

    Notes:
        - Manual set rnge of 0 to 6
    """

    initial_passengers_list = []

    stops_in_route = directedDF["vehicleStopID"].unique()
    for i in range(num_stops-1):
        p = 0

        if stops_in_route[i] in input_key_stops:
            p += 4

        if i <(num_stops//2):
            p += random.randint(0, 6)
        else:
            p += random.randint(0, 4)
            
        initial_passengers_list.append(p)

    initial_passengers_list.append(0)
    return initial_passengers_list

In [None]:
def generate_weights(directedDF, num_stops, input_key_stops):
    """
    Generate weights for stops in a bus trip based on specific criteria.
    It is used to get information to be converted into JSON format for use in the model.

    This function calculates weights for each stop in a bus trip based on provided criteria, such as key stops and the bus's current status.
    The weights are used to represent the significance of each stop in the trip.

    Args:
        df (pd.DataFrame): A DataFrame containing bus trip data, including columns like "vehicleStopSequence," "vehicleStopID," and "vehicleCurrentStatus."
        num_stops (int): The total number of stops in the trip.
        input_key_stops (list): A list of stop IDs considered as key stops that should have higher weights.

    Returns:
        weights_list (list): A list of weights for each stop in the trip, with higher values indicating higher significance.

    Notes:
        - Key stops, when present, receive higher weights.
    """

    stop_sequence = 1
    row = 0

    weights_list = [0.5] * num_stops
    for i in range(num_stops):
        green = 1
        while stop_sequence == int(directedDF["vehicleStopSequence"][row]):
            if int(directedDF["vehicleStopID"][row]) in input_key_stops and green:
                weights_list[stop_sequence-1] += 0.2
                green = 0
            if weights_list[stop_sequence-1] <=1 and directedDF["vehicleCurrentStatus"][row] == "STOPPED_AT":
                weights_list[stop_sequence-1] +=0.05
            row+=1
        stop_sequence +=1

    return weights_list

In [None]:
def get_stop_info(directedDF, num_stops ,staticStopsFile, actualStopNameSeq, stops_to_ignore):
    """
    Extracts information about bus stops from a DataFrame and a static stops file.
    It is used to get information to be converted into JSON format for use in the model.


    This function takes a DataFrame containing bus trip data, a static stops file in CSV format, and a direction ID
    as input, and extracts information about bus stops that match the provided direction ID. The extracted information
    includes stop coordinates, stop IDs, and stop names.

    Args:
        directedDF (pd.DataFrame): A DataFrame containing bus trip data, including columns like "vehicleStopID" and "tripDirectionId".
        num_stops: Expected number of stops.
        staticStopsFile (str): Path to a CSV file containing static bus stop information.
        actualStopNameSeq: Scraped data of the actual stop sequence.
        stops_to_ignore: List of stops to ignore.

    Returns:
        coordinates_list (list): A list of lists containing stop coordinates in the format [(lat1, lon1), (lat2, lon2), ...].
        stop_ids_list (list): A list of unique stop IDs as strings.
        stop_names_list (list): A list of unique stop names as strings.

    Notes:
        - The function filters bus stops in the DataFrame based on the provided directionId.
        - It then reads static stop information from the CSV file and matches it with the filtered bus stops.
        - Coordinates (latitude and longitude), stop IDs, and stop names for the matching bus stops are extracted.
        - Stops with IDs listed in the 'stop_to_ignore' variable are excluded from the result.

    Example:
        df = pd.DataFrame({
                            "vehicleStopID": ["1001", "2002", "3003"],
                            "tripDirectionId": [1, 2, 1]
                        })
        staticStopsFile = "static_stops.csv"
        directionId = 1
        coords, ids, names = get_stop_info(df, staticStopsFile, directionId)
    """

    stopsTxt = pd.read_csv(staticStopsFile, delimiter='\t')

    bus_stops = directedDF["vehicleStopID"].unique().astype(int).astype(str)

    rand_coordinates_list = []
    rand_stop_ids_list = []
    rand_stop_names_list = []

    for i in range(num_stops):
        rand_coordinates_list.append([])

    # Randomly Take
    for idx, each_stop in enumerate(bus_stops):
        for i in range(len(stopsTxt)):
            row = stopsTxt["stop_id,stop_code,stop_name,tts_stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,direction,position"][i]
            one_stop = row.split(",")
            stop_id = str(one_stop[0])
            stop_name = str(one_stop[2])

            if stop_id in bus_stops and stop_id not in stops_to_ignore and each_stop == stop_id:
                try:
                    stop_location_lat = float(one_stop[5])
                    stop_location_long = float(one_stop[6])
                except ValueError:
                    stop_location_lat = float(one_stop[6])
                    stop_location_long = float(one_stop[7])                                     
                
                rand_coordinates_list[idx].append(stop_location_lat)
                rand_coordinates_list[idx].append(stop_location_long)
                rand_stop_ids_list.append(stop_id)
                rand_stop_names_list.append(stop_name)

    # Fix Randomness 
    coordinates_list = []
    stop_ids_list = []
    stop_names_list = []

    for correctStopName in actualStopNameSeq:
        for i in range(len(rand_stop_names_list)):
            if correctStopName == rand_stop_names_list[i]:
                coordinates_list.append(rand_coordinates_list[i])
                stop_ids_list.append(rand_stop_ids_list[i])
                stop_names_list.append(rand_stop_names_list[i])
    
    return coordinates_list, stop_ids_list, stop_names_list

In [None]:
def get_prev_dwell(firstTripTimings):
    """
    Purpose: 
        Get the dwelling times at each stop for Trip 0.

    Args:
        - firstTripTimings: Takes in the first trip timings of all trips. This first trip will be excluded from any optimisation
        as it is treated as trip 0 in the model. 

    Returns:
        A list of dwelling times at each stop.
    """
    result = []
    
    for i in range(len(firstTripTimings)-1): # For each stop in a trip
        stopArr = firstTripTimings[i][0]
        stopDep = firstTripTimings[i][1]

        # No arrival and departure time
        if stopArr == 0 and stopDep == 0:
            result.append(0)

        # No arrival time, but have departure time and is a datetime object. Bus passed by = No dwell time
        if stopArr == 0 and stopDep != 0:
            result.append(0)

        # Have both arrival and departure time and is a datetime object.
        if stopArr != 0 and stopDep != 0:
            dwell = stopDep - stopArr
            result.append(int(dwell.total_seconds()))

    return result

In [None]:
def get_prev_arrival(firstTripTimings):
    """
    Purpose: 
        Get the arrival times at each stop for Trip 0.

    Args:
        - firstTripTimings: Takes in the first trip timings of all trips. This first trip will be excluded from any optimisation
        as it is treated as trip 0 in the model. 

    Returns:
        A list of arrival times at each stop.
    """

    result = []
    for each_stop in firstTripTimings:
        stopArr = each_stop[0]
        stopDep = each_stop[1]

        if stopArr != 0: # Arrival time is present and is a datetime object.
            result.append(int((stopArr - date).total_seconds()))

        elif stopArr == 0 and stopDep != 0: # No arrival time, but have departure time and is a datetime object. Treat as bus pass by the stop.
            result.append(int((stopDep - date).total_seconds()))

        else:
            result.append(0)

    return result

In [None]:
def generate_target_headway(num_trips, num_stops, target_headway):
    """
    Purpose: 
        Generate a list of target headways for multiple trips and stops.

    Args:
        - num_trips: Total number of trips to iterate.
        - num_stops: Total number of stops for each trip.
        - target_headway: The value to be iteratively appended.

    Returns:
        A 2d list of all trips, and in each trip, a list of target headways for all stop.
    """
    result = []
    for i in range(num_trips):
        result.append([])
        for j in range(num_stops):
            result[i].append(target_headway)
    return result



In [None]:
### Not in used, can be built upon ###

# def get_bus_availability(directed_timings, date):
#     """

#     Purpose: 
#         Generate a mocked list of schedule dispatch timings.

#     Args:
#         - directedTimings: List of timings for all going in the same direction.
#         - data: Date you are optimising for. Used to format timestamps into seconds, relative to the date's 0000 hrs.

#     Returns:
#         A list of original dispatch timings for all trips going in the same direction.
#     """
#     result = []

#     for i in range(len(directed_timings)): # For each trip
#         lastRecordIndex = len(directed_timings[i])-1
#         lastRecord = directed_timings[i][lastRecordIndex]
#         while True:
#             if lastRecord[1] != 0: # Last record departure is a datetime object.
#                 result.append(int((lastRecord[1] - date).total_seconds()))
#                 break
#             elif lastRecord[0] != 0: # Last record departure is a datetime object. Missing departure
#                 result.append(int((lastRecord[0] - date).total_seconds()))
#                 break

#             lastRecordIndex = lastRecordIndex-1
#             lastRecord = directed_timings[i][lastRecordIndex]

#     return result

In [None]:
def createOriginalDispatch(directedTimings, date):
    """
    Purpose: 
        Generate a mocked list of schedule dispatch timings.

    Args:
        - directedTimings: List of timings for all going in the same direction.
        - data: Date you are optimising for. Used to format timestamps into seconds, relative to the date's 0000 hrs.

    Returns:
        A list of original dispatch timings for all trips going in the same direction.
    """
    dispatchList = []
    result = []
    for i in range(len(directedTimings)):
        dispatch = directedTimings[i][0][1] + timedelta(seconds=random.randint(-120, 120))
        dispatchList.append(dispatch)

    for eachDispatch in dispatchList:
        result.append(int((eachDispatch - date).total_seconds()))
    return result

In [None]:
def createBusAvailabilities(num_trips):
    """
    Purpose: 
        Generate a mocked list of bus availabilites.

    Args:
        - directedTimings: List of timings for all going in the same direction.
        - data: Date you are optimising for. Used to format timestamps into seconds, relative to the date's 0000 hrs.

    Returns:
        A list of bus available to be dispatched for all trips going in the same direction.
    """
    result = [0] * num_trips
    return result 

## 8. Common Values
Certain attriute's values can not possibly be extracted from the dataset. Hence, they are prefixed at the moment. Users with the neccessary data can supplement it will real world operational data.

In [None]:
bus_capacity = 100
boarding_duration = 2
alighting_duration = 2
max_allowed_deviation = 600
target_headway = 800 # All 82 trips including Trip 0 takes 65000s, or about 800s between each bus
penalty_coefficient = 10000

## 9. Example for Zero Bound Trips [Demo Purposes]

In [None]:
zeroStopsSequence = pd.read_csv("./Scraper/zeroStops.csv")
zeroStopsNames = zeroStopsSequence["0"].unique()
zero_num_stops = len(zeroStopsNames)

zero_stops_to_ignore = ["0", "3007"]
zeroStopsInfo = get_stop_info(zeroTrips, zero_num_stops, "stops.txt", zeroStopsNames, zero_stops_to_ignore)

In [None]:
zero_num_trips = len(zeroTimings) - 4
zero_original_dispatch_list = createOriginalDispatch(zeroTimings[4:],date)
zero_prev_arrival_list = get_prev_arrival(zeroTimings[3])
zero_prev_dwell_list = get_prev_dwell(zeroTimings[3])
zero_bus_availability_list = createBusAvailabilities(zero_num_trips)
zero_target_headway_2dlist = generate_target_headway(zero_num_trips, zero_num_stops, target_headway)
zero_interstation_travel_2dlist = zero_interstation[4:]


zero_input_key_stops = [9302, 7634, 1458, 1497, 1379, 1415, 8199, 14230]
zero_weights_list = generate_weights(zeroTrips, zero_num_stops, zero_input_key_stops) 
zero_arrival_rate_list = generate_arrival_rate(zeroTrips, zero_num_stops, zero_input_key_stops)
zero_initial_passengers_list = generate_initial_passengers(zeroTrips, zero_num_stops, zero_input_key_stops)
zero_initial_passengers_list = generate_initial_passengers(zeroTrips, zero_num_stops, zero_input_key_stops)
zero_alighting_percentage_list = generate_alighting_percentage(zeroTrips, zero_num_stops, zero_input_key_stops)

zero_coordinates_list = zeroStopsInfo[0]
zero_stop_ids_list = zeroStopsInfo[1]
zero_stop_names_list = zeroStopsInfo[2]


# 10. Creating "input.json" File

In [None]:
def jsonOutput(num_trips, num_stops, bus_capacity, original_dispatch_list, coordinates_list, stop_ids_list, stop_names_list, prev_arrival_list, prev_dwell_list, arrival_rate_list, alighting_percentage_list, boarding_duration, alighting_duration, weights_list, bus_availability_list, initial_passengers_list, max_allowed_deviation, target_headway_2dlist, interstation_travel_2dlist):
    output = {
        "num_trips": num_trips,
        "num_stops": num_stops,
        "bus_capacity": bus_capacity,
        "original_dispatch_list": original_dispatch_list,
        "coordinates_list": coordinates_list,
        "stop_ids_list": stop_ids_list,
        "stop_names_list": stop_names_list,
        "prev_arrival_list": prev_arrival_list,
        "prev_dwell_list": prev_dwell_list,
        "arrival_rate_list": arrival_rate_list,
        "alighting_percentage_list": alighting_percentage_list,
        "boarding_duration": boarding_duration,
        "alighting_duration": alighting_duration,
        "weights_list": weights_list, 
        "bus_availability_list": bus_availability_list,
        "initial_passengers_list": initial_passengers_list,
        "max_allowed_deviation": max_allowed_deviation,
        "penalty_coefficient": penalty_coefficient,
        "target_headway_2dlist": target_headway_2dlist,
        "interstation_travel_2dlist": interstation_travel_2dlist 
    }

    # Serializing json
    json_object = json.dumps(output, indent=4)
    
    # Writing to input.json
    with open("input.json", "w") as outfile:
        outfile.write(json_object)


In [None]:
jsonOutput(zero_num_trips, zero_num_stops, bus_capacity, zero_original_dispatch_list, zero_coordinates_list, zero_stop_ids_list, zero_stop_names_list, zero_prev_arrival_list, zero_prev_dwell_list, zero_arrival_rate_list, zero_alighting_percentage_list, boarding_duration, alighting_duration, zero_weights_list, zero_bus_availability_list, zero_initial_passengers_list, max_allowed_deviation, zero_target_headway_2dlist, zero_interstation_travel_2dlist)