In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


In [15]:
# unpickle trips.pkl
trips = pd.read_pickle('trips_downtown_full.pkl')
print(len(trips))

28


In [16]:
import math

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def calculate_s_values(positions):
    """
    Calculate normalized cumulative distances (s values) for a list of positions.
    Each position is a tuple (latitude, longitude).
    """
    distances = []
    cumulative_distances = [0]  # Start with 0 for the first position

    # Calculate distances between consecutive points
    for i in range(1, len(positions)):
        lat1, lon1 = positions[i - 1]
        lat2, lon2 = positions[i]
        dist = haversine_distance(lat1, lon1, lat2, lon2)
        distances.append(dist)
        cumulative_distances.append(cumulative_distances[-1] + dist)

    # Normalize cumulative distances
    total_distance = cumulative_distances[-1]
    s_values = [cd / total_distance for cd in cumulative_distances]

    return s_values

# Example positions (latitude, longitude)
positions = [
    (34.052235, -118.243683),  # Los Angeles
    (36.169941, -115.139832),  # Las Vegas
    (37.774929, -122.419416)   # San Francisco
]

# Calculate s values
s_values = calculate_s_values(positions)
s_values

[0.0, 0.35405627435077325, 1.0]

In [17]:
%matplotlib qt
# trip = trips[3000]
plt.close('all')
virgin = True
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for trip in trips[1:]:
    
    # calculate the station s value where s=0 is the start of the trip and s=1 is the end of the trip



        
    # if len(trip[1]) < 500:
    #     continue
    trip_id = trip[0]
    trip_X = trip[1]
    trip_y = trip[2]
    trip_y_pred = trip[3]
    trip_mem_size = trip[4]
    trip_s = calculate_s_values(trip_X[:,1:3])

    # convert trip_X to dataframe
    print(trip_X.shape)
    df = pd.DataFrame(trip_X, columns=['AbsoluteTime','Latitude', 'Longitude','Tsurf', 'Ta','Hours','Speed','Months'])



   # convert trip_X to dataframe
    # df = pd.DataFrame(trip_X, columns=['Latitude', 'Longitude'])
    df['AbsoluteTime'] = df['AbsoluteTime'] - df['AbsoluteTime'].min()
    df['friction'] = trip_y
    df['friction_pred'] = trip_y_pred
    df['s'] = trip_s

    

    # create subplots

    ax[0].cla()
    ax[1].cla()
    ax[2].cla()
    # # scatter plot of friction in a lat long 2D space
    # sns.scatterplot(x='Longitude', y='Latitude', data=df, hue='friction', ax=ax[0], s=10, palette='Spectral', hue_norm=(0.1,.9),legend=False)
    # ax[0].set_title('Measured - '+str(trip_id))

    # # scatter plot of friction in a lat long 2D space
    # sns.scatterplot(x='Longitude', y='Latitude', data=df, hue='friction_pred', ax=ax[1], s=10, palette='Spectral', hue_norm=(0.1,.9), legend=False)
    # ax[1].set_title('Predicted - '+str(trip_id))

    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[0].scatter(df['Longitude'], df['Latitude'], c=df['friction'], cmap='viridis', s=3, alpha=0.7, vmin=.6, vmax=.8)
    ax[0].set_title('Measured - trip_id:'+str(trip_id))
    
    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[1].scatter(df['Longitude'], df['Latitude'], c=df['friction_pred'], cmap='viridis', s=3, alpha=0.7, vmin=.6, vmax=.8)
    ax[0].set_xlabel('Longitude')
    ax[0].set_ylabel('Latitude')
    ax[1].set_xlabel('Longitude')
    ax[1].set_ylabel('Latitude')
    ax[1].set_title('Predicted')
    # ax[0].set_xlim(23.1, 24.10)
    # ax[0].set_ylim(60.30, 60.45)

    # ax[1].set_xlim(23.1, 24.10)
    # ax[1].set_ylim(60.30, 60.45)
    if virgin:
        virgin = False


        # add colorbar to the scatter plot
        fig.colorbar(sc, ax=ax[0], label='Friction')
        fig.colorbar(sc, ax=ax[1], label='Friction')
        fig.tight_layout()
    # # Add a colorbar to the plot with the information from the scatter plot
    # norm = plt.Normalize(.1, .9)
    # sm = plt.cm.ScalarMappable(cmap="Spectral", norm=norm)
    # sm.set_array([])
    # # Add the colorbar to the figure
    # fig.colorbar(sm, ax=ax[0], label='Hue Value')
    # fig.colorbar(sm, ax=ax[1], label='Hue Value')

    # # line plot of friction over time
    # sns.lineplot(x='AbsoluteTime', y='friction', data=df, ax=ax[2], label='Measured')
    # ax[2].set_title('Measured vs. Predicted - '+str(trip_id) + ' - ' + str(trip[1][0,0]))
    # # line plot of friction over time
    # sns.lineplot(x='AbsoluteTime', y='friction_pred', data=df, ax=ax[2], label='Predicted')

    # plot friction over time using plt
    ax[2].plot(df['s'], df['friction'], label='Measured')
    ax[2].plot(df['s'], df['friction_pred'], label='Predicted')
    # ax[2].set_title('Measured vs. Predicted - '+str(trip_id) + ' - ' + str(trip[1][0,0]))
    ax[2].set_title('Friction (Measured vs. Predicted)')
    ax[2].legend()
    ax[2].set_xlabel('Station (s)')
    ax[2].set_ylabel('Friction')



    # make ylim .2 to 08
    ax[2].set_ylim(0, 1)

    plt.tight_layout()
      
    
    plt.pause(0.01)

    # save the png figure in \figures directory
    plt.savefig('figures/fig_'+str(trip_id)+'.png')

    # if key n is pressed, go to next trip
    if plt.waitforbuttonpress():            
        continue


(2375, 8)
(151, 8)
(332, 8)
(6664, 8)
(7021, 8)
(4, 8)
(7, 8)


ZeroDivisionError: float division by zero

1001

In [14]:
# calculate the MAE of friction and friction_pred for each trip in trips
errors = []
for trip in trips[1:]:
    trip_id = np.array(trip[0]).squeeze()
    trip_X = np.array(trip[1]).squeeze()
    trip_y = np.array(trip[2]).squeeze()
    trip_y_pred = np.array(trip[3]).squeeze()
    # print(trip_y.shape, trip_y_pred.shape)
    errors.append(np.mean(np.abs(trip_y - trip_y_pred)))

print(np.mean(errors))

0.03830959162302333


In [10]:
# print number of records in all the trips
print(np.sum([len(trip[1]) for trip in trips]))

print(len(trips))


101646
55
