In [5]:
import sys

sys.path.append('..')
from utils.coordinates import calculate_haversine_distance, split_line_between_coordinates
import json

print(split_line_between_coordinates((1, 1), (10, 10), 9))


[(2.0, 2.0), (3.0, 3.0), (4.0, 4.0), (5.0, 5.0), (6.0, 6.0), (7.0, 7.0), (8.0, 8.0), (9.0, 9.0)]


In [6]:
input_file_path = "../data/outputs/json/v1.1_output.json"

with open(input_file_path, "r") as f:
        data = json.load(f)

print(data)

num_trips = data["num_trips"]
num_stops = data["num_stops"]


coordinates = {f"{i+1}": (data["coordinates_list"][i][0], data["coordinates_list"][i][1]) for i in range(num_trips)}


cumulative_distances = {
        "1": 0
}

for i in range(2, num_stops+1):
        cumulative_distances[f"{i}"] = cumulative_distances[f"{i-1}"] + calculate_haversine_distance(coordinates[f"{i-1}"], coordinates[f"{i}"])

print(cumulative_distances)

{'num_trips': 5, 'num_stops': 5, 'original_dispatch_list': [600, 1200, 1800, 2400, 3000], 'coordinates_list': [[45.492786, -122.401855], [45.500866, -122.42799], [45.50276, -122.42709], [45.49802, -122.421265], [45.49153, -122.39884]], 'dwell_matrix': {'1,1': 0, '1,2': 11, '1,3': 30, '1,4': 28, '1,5': 0, '2,1': 0, '2,2': 53, '2,3': 233, '2,4': 109, '2,5': 0, '3,1': 0, '3,2': 61, '3,3': 221, '3,4': 95, '3,5': 0, '4,1': 0, '4,2': 53, '4,3': 189, '4,4': 81, '4,5': 0, '5,1': 0, '5,2': 67, '5,3': 257, '5,4': 115, '5,5': 0}, 'busload_matrix': {'1,1': 0, '1,2': 5, '1,3': 10, '1,4': 15, '1,5': 23, '2,1': 0, '2,2': 24, '2,3': 46, '2,4': 117, '2,5': 125, '3,1': 0, '3,2': 29, '3,3': 54, '3,4': 110, '3,5': 114, '4,1': 0, '4,2': 26, '4,3': 47, '4,4': 94, '4,5': 97, '5,1': 0, '5,2': 32, '5,3': 59, '5,4': 128, '5,5': 134}, 'arrival_matrix': {'1,2': 1096, '1,3': 1307, '1,4': 1637, '1,5': 2065, '2,2': 1863, '2,3': 2116, '2,4': 2649, '2,5': 3159, '3,2': 2783, '3,3': 3044, '3,4': 3565, '3,5': 4060, '4,2'

In [7]:
import pandas as pd
pd.set_option("display.max_rows", 20)

POLLING_RATE = 1

num_trips = 5
num_stops = 5



def initialise_dataframe(current_trip):

    # Initialize empty lists for each column
    timestamps = []
    bus_trip_nos = []
    statuses = []
    bus_stop_nos = []
    latitudes = []
    longitudes = []
    distances = []

    # dispatch from the bus depot
    timestamps.append(data["dispatch_list"][f"{current_trip}"])
    bus_trip_nos.append(current_trip)
    statuses.append("DISPATCHED_FROM")
    bus_stop_nos.append(1)
    latitudes.append(coordinates[f"1"][0])
    longitudes.append(coordinates[f"1"][1])
    distances.append(0)

    # Iterate through the keys in the arrival_matrix (assuming it contains all necessary keys)
    for key in data["arrival_matrix"]:
        # Split the key into trip number and stop number
        trip_no, stop_no = map(int, key.split(','))

        # Get the timestamp from the arrival_matrix
        timestamp_seconds = data["arrival_matrix"][key]

        # Append data to respective lists for stops
        if trip_no == current_trip:
            timestamps.append(timestamp_seconds)
            bus_trip_nos.append(trip_no)
            statuses.append("STOPPED_AT")
            bus_stop_nos.append(stop_no)
            latitudes.append(coordinates[f"{stop_no}"][0])
            longitudes.append(coordinates[f"{stop_no}"][1])
            distances.append(cumulative_distances[f"{stop_no}"])

    timestamp_list = timestamps
    for i in range(len(timestamp_list)-1): # timestamp_list[i] = every stop's timestamp
        dwell_count = POLLING_RATE

        num_intermediate_segments = timestamp_list[i+1] - timestamp_list[i]
        print(timestamp_list[i], timestamp_list[i+1], num_intermediate_segments)
        segments = split_line_between_coordinates(
            (coordinates[f"{i+1}"][0], coordinates[f"{i+1}"][1]),
            (coordinates[f"{i+2}"][0], coordinates[f"{i+2}"][1]),
            num_intermediate_segments
            )

        
        interstation_distance = cumulative_distances[f"{i+2}"] - cumulative_distances[f"{i+1}"]
        distance_per_timestep = interstation_distance / num_intermediate_segments

        segment_count = 0
        
        for intermediate_time in range(timestamp_list[i]+1, (timestamp_list[i+1]), POLLING_RATE): #intermediate_time = timestamp at intermediates
            if dwell_count <= data["dwell_matrix"][f"{current_trip},{i+1}"]:
                timestamps.append(intermediate_time)
                bus_trip_nos.append(current_trip)
                statuses.append("DWELL_AT")
                latitudes.append(coordinates[f"{i+1}"][0])
                longitudes.append(coordinates[f"{i+1}"][1])
                bus_stop_nos.append(i+1)
                distances.append(cumulative_distances[f"{i+1}"])
                dwell_count += POLLING_RATE

            else:
                timestamps.append(intermediate_time)
                bus_trip_nos.append(current_trip)
                statuses.append("TRANSIT_TO")
                latitudes.append(segments[segment_count][0])
                longitudes.append(segments[segment_count][1])
                bus_stop_nos.append(i+2)
                covered_distance = cumulative_distances[f"{i+1}"] + distance_per_timestep * (segment_count+1)
                distances.append(covered_distance)  # Placeholder for NaN
                segment_count += 1

    # Create a DataFrame from the lists
    df = pd.DataFrame({
        "timestamp (in seconds)": timestamps,
        "bus_trip_no": bus_trip_nos,
        "status": statuses,
        "bus_stop_no": bus_stop_nos,
        "latitude": latitudes,
        "longitude": longitudes,
        "distance": distances
    })

    return df

dataframes_list = []
for trip in range(1, num_trips+1): #TODO: change back
    dataframes_list.append(initialise_dataframe(trip))
    
df = pd.concat(dataframes_list)



# Print the resulting DataFrame
df

996 1096 100
1096 1307 211
1307 1637 330
1637 2065 428
1763 1863 100
1863 2116 253
2116 2649 533
2649 3159 510
2683 2783 100
2783 3044 261
3044 3565 521
3565 4060 495
3500 3600 100
3600 3853 253
3853 4342 489
4342 4823 481
4500 4600 100
4600 4867 267
4867 5423 556
5423 5938 515


Unnamed: 0,timestamp (in seconds),bus_trip_no,status,bus_stop_no,latitude,longitude,distance
0,996,1,DISPATCHED_FROM,1,45.492786,-122.401855,0.000000
1,1096,1,STOPPED_AT,2,45.500866,-122.427990,2226.351833
2,1307,1,STOPPED_AT,3,45.502760,-122.427090,2448.328273
3,1637,1,STOPPED_AT,4,45.498020,-122.421265,3143.955380
4,2065,1,STOPPED_AT,5,45.491530,-122.398840,5034.982032
...,...,...,...,...,...,...,...
1434,5933,5,TRANSIT_TO,5,45.493042,-122.404065,4594.354463
1435,5934,5,TRANSIT_TO,5,45.493030,-122.404022,4598.026359
1436,5935,5,TRANSIT_TO,5,45.493017,-122.403978,4601.698256
1437,5936,5,TRANSIT_TO,5,45.493004,-122.403935,4605.370152


In [215]:
pd.set_option("display.max_rows", None)
df = df.sort_values(by=["timestamp (in seconds)"])
df = df.reset_index(drop=True)
df

Unnamed: 0,timestamp (in seconds),bus_trip_no,status,bus_stop_no,latitude,longitude,distance
0,996,1,DISPATCHED_FROM,1,45.492786,-122.401855,0.0
1,997,1,TRANSIT_TO,2,45.492867,-122.402116,22.263518
2,998,1,TRANSIT_TO,2,45.492948,-122.402378,44.527037
3,999,1,TRANSIT_TO,2,45.493028,-122.402639,66.790555
4,1000,1,TRANSIT_TO,2,45.493109,-122.4029,89.054073
5,1001,1,TRANSIT_TO,2,45.49319,-122.403162,111.317592
6,1002,1,TRANSIT_TO,2,45.493271,-122.403423,133.58111
7,1003,1,TRANSIT_TO,2,45.493352,-122.403684,155.844628
8,1004,1,TRANSIT_TO,2,45.493432,-122.403946,178.108147
9,1005,1,TRANSIT_TO,2,45.493513,-122.404207,200.371665
