In [None]:
from ast import literal_eval
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
from sklearn.cluster import KMeans
from shapely.geometry import box, Point

from db_utils import get_rect_to_rect_data, DatabaseConnection

In [None]:
sumo_sim_file = "./sim_data/prinzenstr_new_params.csv"

In [None]:
# Prinzenstr (Annenstr. to Moritzplatz)
start_rect_coords = (13.413598,52.507573,13.414164,52.507822)
end_rect_coords = (13.410895,52.504089,13.411461,52.504338)

# Prinzenstr (Dresdener Str. to Sebastianstr.)
# start_rect_coords = (13.412877,52.506571,13.413298,52.506765)
# end_rect_coords = (13.411595,52.504951,13.412016,52.505145)

In [None]:
df_simra = get_rect_to_rect_data(start_rect_coords, end_rect_coords)
df_simra_paths = df_simra[['filename', 'ts', 'lon', 'lat', 'velo', 'time_diff']]
df_simra_paths.rename({'filename': 'ride_id', 'time_diff': 'duration'}, axis='columns', inplace=True)

In [None]:
df_sumo = pd.read_csv(sumo_sim_file, delimiter=';')
df_sumo_paths = df_sumo[['vehicle_id', 'timestep_time', 'vehicle_x', 'vehicle_y', 'vehicle_speed']]
df_sumo_paths.rename({'vehicle_id': 'ride_id', 'timestep_time': 'ts', 'vehicle_x': 'lon', 'vehicle_y': 'lat', 'vehicle_speed': 'velo'}, axis='columns', inplace=True)

In [None]:
n_rides = {'SUMO': len(df_sumo_paths.groupby('ride_id')), 'SimRa': len(df_simra_paths.groupby('ride_id'))}

print(f"Number of rides: {n_rides['SUMO']}")

In [None]:
start_rect = box(*start_rect_coords)
end_rect = box(*end_rect_coords)

def get_indices_to_delete(ride_group):
    
    mask_first = ride_group.apply(lambda coord: start_rect.contains(Point(coord['lon'], coord['lat'])), axis=1)
    mask_end = ride_group.apply(lambda coord: end_rect.contains(Point(coord['lon'], coord['lat'])), axis=1)    
    try:
        start_idx = mask_first[mask_first==True].index[0]
        end_idx = mask_end[mask_end==True].index[-1]
        return [idx for idx in ride_group.index if idx < start_idx or idx > end_idx]
    except: 
        # probably vehicle (ride) does not arrive in end box because simulation ended beforehand
        return list(ride_group.index)

grouped = df_sumo_paths.groupby('ride_id')
for _, ride_group in grouped:
    indices_to_delete = get_indices_to_delete(ride_group)
    df_sumo_paths.drop(index=get_indices_to_delete(ride_group), inplace=True)

In [None]:
df_simra_paths.velo.hist(density=True)

In [None]:
df_sumo_paths.velo.hist(density=True, color='orange')

In [None]:
def plot_ride_paths(ride_data: Dict[str, pd.DataFrame]):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.set_aspect(1.5)
    
    colors = ['blue', 'orange', 'green', 'pink']

    # plot rides for each dataframe
    for data_idx, (data_name, df) in enumerate(ride_data.items()):
        df_grouped = df.groupby('ride_id')
        for ride_group_name in df_grouped.groups:
            df_ride_group = df_grouped.get_group(ride_group_name)
            ax.plot(df_ride_group.lon, df_ride_group.lat, color=colors[data_idx], label=data_name, linewidth=1)
            # add labels to legend

    ax.xaxis.set_major_locator(ticker.LinearLocator(4))
    ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=False))
    ax.yaxis.set_major_locator(ticker.LinearLocator(4))
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=False))
    ax.set_xlabel('Longitude in decimal degrees')
    ax.set_ylabel('Latitude in decimal degrees')

    # plt.savefig('simra_vs_sumo_ride_path.png', transparent=True)
    # plt.legend()
    plt.show()

In [None]:
ride_data = {'SimRa': df_simra_paths, 'SUMO': df_sumo_paths}

In [None]:
plot_ride_paths(ride_data)

In [None]:
sumo_path_durations = list(df_sumo_paths.groupby('ride_id').ts.agg(np.ptp))
simra_path_durations = [td.total_seconds() for td in df_simra_paths.groupby('ride_id').first().duration]

In [None]:
plt.hist(sumo_path_durations, color='orange', density=True, bins=10);
plt.hist(simra_path_durations, color='blue', alpha=0.5, density=True, bins=60);
plt.xlim(0, 100);
