In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import math

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def calculate_distances(positions):
    """
    Calculate normalized cumulative distances (s values) for a list of positions.
    Each position is a tuple (latitude, longitude).
    """
    distances = []
    cumulative_distances = [0]  # Start with 0 for the first position

    # Calculate distances between consecutive points
    for i in range(1, len(positions)):
        lat1, lon1 = positions[i - 1]
        lat2, lon2 = positions[i]
        dist = haversine_distance(lat1, lon1, lat2, lon2)
        
        distances.append(dist)
        cumulative_distances.append(cumulative_distances[-1] + dist)

    # Normalize cumulative distances
    total_distance = cumulative_distances[-1]
    
    # s_values = [cd / total_distance for cd in cumulative_distances]

    return cumulative_distances



In [131]:
# Full dataset
# df, summary = pd.read_pickle('Teconer_results/firm-elevator-1052.pkl')
# df_30, summary_30 = pd.read_pickle('Teconer_results/jumping-gorge-1050.pkl')

# 100K
df, summary = pd.read_pickle('Teconer_results/peach-plant-1022.pkl')
df_30, summary_30 = pd.read_pickle('Teconer_results/northern-dream-1031.pkl')



df['UnixTime'] = df['UnixTime'] - df['UnixTime'].min()
# calculate trip station using calculate_s_values
df['TMI (1 min preview)'] = df['TMI']
df['TMI (30 min preview)'] = df_30['TMI']
# group by trip_id and make each group a separate df
grouped = df.groupby('TripID')

trip_dfs = [group for _, group in grouped]

# print average length of trip
trip_lengths = [len(trip) for trip in trip_dfs]

estimators = ['TMI']




# calculate the MAE and RMSE across all records in df
def calculate_error_metrics(df, estimator):
    """
    Calculate mean absolute error (MAE) and root mean squared error (RMSE) between Friction (meansured) and predicted value by each estimator.
    """
    # Calculate mean absolute error
    mae = np.mean(np.abs(df['Friction (measured)'] - df[estimator]))

    # Calculate root mean squared error
    rmse = np.sqrt(np.mean((df['Friction (measured)'] - df[estimator]) ** 2))

    # Calculate R^2
    ss_res = np.sum((df['Friction (measured)'] - df[estimator]) ** 2)
    ss_tot = np.sum((df['Friction (measured)'] - np.mean(df['Friction (measured)'])) ** 2)
    r2 = 1 - ss_res / ss_tot

    return mae, rmse, r2

# print the summary dictionary make sure each item appears in a separate line
for key, value in summary.items():
    print(key, value)
for estimator in estimators:

    print("Estimator: ", estimator)
    mae, rmse, r2 = calculate_error_metrics(df, estimator)
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2: {r2:.2f}")




average_records_per_trip 115.2073732718894
average_preview_records 3.7747244451155066
training_size_log [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 92, 93, 94, 95, 96, 91, 92, 93, 94, 92, 92, 93, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 117, 118, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 124, 125, 126, 127, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 136, 137, 138, 136, 134, 135, 136, 137, 133, 134, 135, 136, 127, 128, 123, 124, 125, 126, 126, 127, 128, 129, 130, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 133, 130, 131, 132, 133, 134, 135, 

In [117]:
# plot the training_size_log
# full with epsilon 0.55

_, summary_ep55 = pd.read_pickle('Teconer_results/winter-shape-1046.pkl')

# full with epsilon 0.7

_, summary_ep7 = pd.read_pickle('Teconer_results/autumn-bee-1037.pkl')

# full with epsilon 0.9

_, summary_ep9 = pd.read_pickle('Teconer_results/firm-elevator-1052.pkl')


plt.close('all')
fig, ax = plt.subplots(figsize=(8, 4))
plt.plot(summary_ep55['training_size_log'])
plt.plot(summary_ep7['training_size_log'])
plt.plot(summary_ep9['training_size_log'])
plt.legend(['epsilon = 0.55', 'epsilon = 0.7', 'epsilon = 0.9'])
plt.xlabel('Number of Received Samples')
plt.ylabel('Number of Samples in the Memory')
plt.xlim(0, len(summary_ep55['training_size_log']) - 1)
plt.grid()
plt.tight_layout()

FileNotFoundError: [Errno 2] No such file or directory: 'Teconer_results/autumn-bee-1037.pkl'

In [132]:
%matplotlib qt
from matplotlib.gridspec import GridSpec
plt.close('all')
# make defaul font of the plots arial
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['font.family'] = "sans-serif"
# make the font size 12
plt.rcParams.update({'font.size': 13})

virgin = True
# fig, ax = plt.subplots(1, 2, figsize=(15, 4))
fig = plt.figure(figsize=(15, 4))
gs = GridSpec(1, 2, width_ratios=[1, 2])  # Adjust width ratios to make the first subplot narrower
ax0 = fig.add_subplot(gs[0])
ax1 = fig.add_subplot(gs[1])
ax = [ax0, ax1]
for trip in trip_dfs:
    trip['distance'] = calculate_distances(trip[['Latitude', 'Longitude']].values)
    if len(trip) < 500:
        # or trip['TripID'].iloc[0] not in [606, 617, 8304, 8307]

        continue

    ax[0].cla()
    ax[1].cla()

    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[0].scatter(trip['Longitude'], trip['Latitude'], c=trip['Friction (measured)'], cmap='viridis', s=3, alpha=0.7, vmin=.2, vmax=.8)
    # mark the first and last point of the trip with a red and green star
    ax[0].plot(trip['Longitude'].iloc[0], trip['Latitude'].iloc[0], 'g*', markersize=10)
    ax[0].plot(trip['Longitude'].iloc[-1], trip['Latitude'].iloc[-1], 'r*', markersize=10)

    ax[0].set_title('Trip data (Trip ID: '+str(trip['TripID'].iloc[0])+')')
    ax[0].set_xlabel('Longitude')
    ax[0].set_ylabel('Latitude')
    # make axis equal and box square
    ax[0].axis('equal')
    ax[0].grid(True)


    if virgin:
        virgin = False
        # add colorbar to the scatter plot
        fig.colorbar(sc, ax=ax[0], label='Measured friction')
        fig.tight_layout()

    # plot friction over time using plt
    ax[1].plot(trip['distance'], trip['Friction (measured)'], label='Measured')
    # for method_name in estimators:
    ax[1].plot(trip['distance'], trip['TMI (1 min preview)'], label='TMI (1 min preview)', alpha=0.7)
    ax[1].plot(trip['distance'], trip['TMI (30 min preview)'], label='TMI (30 min preview)', alpha=0.7)
    # ax[1].set_title('Measured vs. Predicted - '+str(trip_id) + ' - ' + str(trip[1][0,0]))
    ax[1].set_title('Friction (Measured vs. Predicted)')
    ax[1].legend(loc='lower center', fontsize=12, ncol=3)
    ax[1].set_xlabel('Trip Distance [Km]')
    ax[1].set_ylabel('Friction')
    ax[1].grid(True)
    # set the figure name to the trip_id


    # make ylim .2 to 08
    ax[1].set_ylim(0, 1)

    # set the left right space 
    plt.subplots_adjust(left=0.06, right=0.99, top=0.9, bottom=0.13, wspace=0.12, hspace=0.4)
    # save png figure in \final_trips
    plt.savefig('final_trips/100K/trip_example_'+str(trip['TripID'].iloc[0])+'.png')
    # plt.tight_layout()
    plt.pause(0.01)


    # if key n is pressed, go to next trip
    # if plt.waitforbuttonpress():            
    #     continue