In [33]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import math

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def calculate_s_values(positions):
    """
    Calculate normalized cumulative distances (s values) for a list of positions.
    Each position is a tuple (latitude, longitude).
    """
    distances = []
    cumulative_distances = [0]  # Start with 0 for the first position

    # Calculate distances between consecutive points
    for i in range(1, len(positions)):
        lat1, lon1 = positions[i - 1]
        lat2, lon2 = positions[i]
        dist = haversine_distance(lat1, lon1, lat2, lon2)
        
        distances.append(dist)
        cumulative_distances.append(cumulative_distances[-1] + dist)

    # Normalize cumulative distances
    total_distance = cumulative_distances[-1]
    
    s_values = [cd / total_distance for cd in cumulative_distances]

    return cumulative_distances, s_values




In [46]:
df, summary = pd.read_pickle('Teconer_results/2024-07-04-21-58-44.pkl')
# 2024-07-04-21-30-15.pkl
# 2024-07-04-21-32-04.pkl 1M Jan
# 2024-07-04-21-32-04.pkl 100K Jan
# 2024-07-04-21-31-14.pkl 100K downtown
df['AbsoluteTime'] = df['AbsoluteTime'] - df['AbsoluteTime'].min()
# calculate trip station using calculate_s_values

# group by trip_id and make each group a separate df
grouped = df.groupby('TripID')

trip_dfs = [group for _, group in grouped]

# print average length of trip
trip_lengths = [len(trip) for trip in trip_dfs]

estimators = ['TMI']




# calculate the MAE and RMSE across all records in df
def calculate_error_metrics(df, estimator):
    """
    Calculate mean absolute error (MAE) and root mean squared error (RMSE) between Friction (meansured) and predicted value by each estimator.
    """
    # Calculate mean absolute error
    mae = np.mean(np.abs(df['Friction (measured)'] - df[estimator]))

    # Calculate root mean squared error
    rmse = np.sqrt(np.mean((df['Friction (measured)'] - df[estimator]) ** 2))

    # Calculate R^2
    ss_res = np.sum((df['Friction (measured)'] - df[estimator]) ** 2)
    ss_tot = np.sum((df['Friction (measured)'] - np.mean(df['Friction (measured)'])) ** 2)
    r2 = 1 - ss_res / ss_tot

    return mae, rmse, r2

# print the summary dictionary make sure each item appears in a separate line
for key, value in summary.items():
    print(key, value)
for estimator in estimators:

    print("Estimator: ", estimator)
    mae, rmse, r2 = calculate_error_metrics(df, estimator)
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2: {r2:.2f}")




average_records_per_trip 200.4551442198501
average_ahead_records 107.38581335928946
preview_duration 600
learning_model DecisionTreeRegressor
dataset Teconer_Downtown_1M
online_models ['TMI']
metric_MAE 0.061104813743999495
metric_RMSE 0.1144246398148579
metric_R2 0.24974997415575428
TMI {'epsilon': 0.6, 'prior': 0.5, 'num_sub_predictions': 20, 'min_memory_len': 10, 'max_elimination_per_pruning': 10, 'probabilistic_prediction': False, 'method_name': 'TMI'}
Estimator:  TMI
MAE: 0.06
RMSE: 0.11
R2: 0.25


In [47]:
%matplotlib qt

plt.close('all')
virgin = True
fig, ax = plt.subplots(2, 1, figsize=(8, 7))


for trip in trip_dfs:
    if len(trip) < 300:
        continue


    cum_dists, s_values = calculate_s_values(trip[['Latitude', 'Longitude']].values)
    trip['trip_station'] = s_values
    trip['cum_dist'] = cum_dists
    ax[0].cla()
    ax[1].cla()
    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[0].scatter(trip['Longitude'], trip['Latitude'], c=trip['Friction (measured)'], cmap='viridis', s=3, alpha=0.7, vmin=.2, vmax=.8)
    # mark the first point of the trip with a red star
    ax[0].plot(trip['Longitude'].iloc[0], trip['Latitude'].iloc[0], 'r*', markersize=10)
    ax[0].set_title('Measured Friction')
    ax[0].set_xlabel('Longitude')
    ax[0].set_ylabel('Latitude')
    ax[0].grid(True)


    if virgin:
        virgin = False
        # add colorbar to the scatter plot
        fig.colorbar(sc, ax=ax[0], label='Friction')
        fig.tight_layout()

    # plot friction over time using plt
    ax[1].plot(trip['cum_dist'], trip['Friction (measured)'], label='Measured')
    for method_name in estimators:
        ax[1].plot(trip['cum_dist'], trip[method_name], label=method_name)
    # ax[2].set_title('Measured vs. Predicted - '+str(trip_id) + ' - ' + str(trip[1][0,0]))
    ax[1].set_title('Friction (Measured vs. Predicted)')
    ax[1].legend()
    ax[1].set_xlabel('Cumulative Trip Distance [Km]')
    ax[1].set_ylabel('Friction')
    ax[1].grid(True)

    # make ylim .2 to 08
    ax[1].set_ylim(0, 1)

    plt.tight_layout()
    plt.pause(0.01)

    # save the png figure in \figures directory
    # plt.savefig('figures/fig_'+str(trip_id)+'_kswin.png')
    # if key n is pressed, go to next trip
    if plt.waitforbuttonpress():            
        continue



KeyboardInterrupt: 

In [27]:
trip[['Latitude', 'Longitude']].values

(1360, 2)