In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import math

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def calculate_s_values(positions):
    """
    Calculate normalized cumulative distances (s values) for a list of positions.
    Each position is a tuple (latitude, longitude).
    """
    distances = []
    cumulative_distances = [0]  # Start with 0 for the first position

    # Calculate distances between consecutive points
    for i in range(1, len(positions)):
        lat1, lon1 = positions[i - 1]
        lat2, lon2 = positions[i]
        dist = haversine_distance(lat1, lon1, lat2, lon2)
        
        distances.append(dist)
        cumulative_distances.append(cumulative_distances[-1] + dist)

    # Normalize cumulative distances
    total_distance = cumulative_distances[-1]
    
    s_values = [cd / total_distance for cd in cumulative_distances]

    return s_values



In [21]:
# unpickle trips.pkl

trips = pd.read_pickle('trips_100K_rf_tmi_and_ptmi.pkl')
# trips = pd.read_pickle('trips_100K_msmsa.pkl')
# trips = pd.read_pickle('trips_100K_kswin.pkl')
# trips = pd.read_pickle('trips_100K_naive.pkl')

# trips = pd.read_pickle('trips_downtown_full.pkl')
# trips = pd.read_pickle('trips_downtown_100K_kswin.pkl')
# calculate the MAE of friction and friction_pred for each trip in trips

# MAE = []
# RMSE = []
# for trip in trips[1:]:
#     trip_id = np.array(trip[0]).squeeze()
#     trip_X = np.array(trip[1]).squeeze()
#     trip_y = np.array(trip[2]).reshape(-1, 1)
#     trip_y_pred = np.array(trip[3]).reshape(-1, 1)
#     # print(trip_y.shape, trip_y_pred.shape)
#     MAE.append(np.mean(np.abs(trip_y - trip_y_pred)))
#     RMSE.append(np.mean((trip_y - trip_y_pred)**2))

print('Number of Trips: ', len(trips))
# print('MAE: ', np.mean(MAE))
# print('RMSE: ', np.mean(np.sqrt(RMSE)))


Number of Trips:  217


Number of Estimators:  8


In [22]:
# collect all the records from the trips into a pandas dataframe
df = pd.DataFrame()
num_estimators = len(trips[0][3])
for trip in trips:
    trip_id = np.array(trip[0]).squeeze()
    trip_X = np.array(trip[1]).reshape(-1,8)
    trip_y = np.array(trip[2]).squeeze()
    trip_y_pred = np.array(trip[3]).reshape(-1, num_estimators)
    trip_mem_size = np.array(trip[4]).reshape(-1, num_estimators)
    # trip_s = calculate_s_values(trip_X[:,1:3])
    
    # print(trip_X.shape, trip_y.shape, trip_y_pred.shape, trip_mem_size.shape)
    # convert trip_X to dataframe
    df_temp = pd.DataFrame(trip_X, columns=['AbsoluteTime','Latitude', 'Longitude','Tsurf', 'Ta','Hours','Speed','Months'])
    # df_temp = pd.DataFrame(trip_X, columns=['AbsoluteTime','Latitude', 'Longitude','Tsurf', 'Ta','Hours','Speed'])
                    #     tmi.TMI(epsilon=0.9),
                    # tmi.TMI(probabilistic_prediction='ensemble', epsilon=0.9),
                    # msmsa.MSMSA(),
                    # kswin_reg.KSWIN(),
                    # adwin_reg.ADWIN(),
                    # ddm_reg.DDM(),
                    # ph_reg.PH(),
                    # naive_reg.Naive(),

    # df_temp['Measured'] = trip_y
    # df_temp['TMI'] = trip_y_pred[:,0]
    # df_temp['PTMI'] = trip_y_pred[:,1]
    # df_temp['MSMSA'] = trip_y_pred[:,2]
    # df_temp['KSWIN'] = trip_y_pred[:,3]
    # df_temp['ADWIN'] = trip_y_pred[:,4]
    # df_temp['DDM'] = trip_y_pred[:,5]
    # df_temp['PH'] = trip_y_pred[:,6]
    # df_temp['Naive'] = trip_y_pred[:,7]


    
    df_temp['Measured'] = trip_y
    df_temp['TMI'] = trip_y_pred[:,0]
    df_temp['PTMI'] = trip_y_pred[:,1]
    
    # df_temp['MSMSA'] = trip_y_pred[:,1]
    # df_temp['KSWIN'] = trip_y_pred[:,2]
    # df_temp['ADWIN'] = trip_y_pred[:,3]
    # df_temp['DDM'] = trip_y_pred[:,4]
    # df_temp['PH'] = trip_y_pred[:,5]
    # df_temp['Naive'] = trip_y_pred[:,6]


    # df_temp['s'] = trip_s
    df_temp['trip_id'] = trip_id
    # df_temp['Train Set Size (TMI)'] = trip_mem_size[:,0]
    # df_temp['Train Set Size (PTMI)'] = trip_mem_size[:,1]
    # df_temp['Train Set Size (MSMSA)'] = trip_mem_size[:,2]
    # df_temp['Train Set Size (KSWIN)'] = trip_mem_size[:,3]
    # df_temp['Train Set Size (ADWIN)'] = trip_mem_size[:,4]
    # df_temp['Train Set Size (DDM)'] = trip_mem_size[:,5]
    # df_temp['Train Set Size (PH)'] = trip_mem_size[:,6]
    # df_temp['Train Set Size (Naive)'] = trip_mem_size[:,7]


    df = pd.concat([df, df_temp])

# df['abs_error'] = np.abs(df['friction'] - df['friction_pred'])
# df['sqrd_error'] = (df['friction'] - df['friction_pred'])**2

# # print MAE and RMSE of all records
# print('MAE:', df['abs_error'].mean())
# print('RMSE:', np.sqrt(df['sqrd_error'].mean()))
# # print R^2 of all records
# print('R^2:', 1 - np.sum(df['sqrd_error'])/np.sum((df['friction'] - df['friction'].mean())**2))

# convert AbsoluteTime to datetime where the first record is 2018-01-01
df['DateTime'] = pd.to_datetime(df['AbsoluteTime'], unit='s', origin='2018-01-01')


%matplotlib qt
# plot a histogram of records vs datatime
df['DateTime'].hist(bins=60)
# make x ticks rotate 45 degrees
plt.xticks(rotation=45)
plt.ylabel('Number of Records')
# make the first and last on the month limit the x axis
plt.tight_layout()
# Set 'Datetime' as the index
df.set_index('DateTime', inplace=True)

# Resample by week, creating a list of DataFrames
weekly_dfs = [group for _, group in df.resample('W')]

In [24]:
%matplotlib qt

plt.close('all')
virgin = True
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for trip in trips[1:]:
    
    print(len(trip[2]))
    if len(trip[2]) < 200:
        continue
        
    trip_id = trip[0]
    trip_X = trip[1]
    # trip_y = trip[2]
    # trip_y_pred = trip[3]
    # trip_mem_size = trip[4]
    trip_y = np.array(trip[2]).squeeze()
    trip_y_pred = np.array(trip[3]).reshape(-1, num_estimators)
    trip_mem_size = np.array(trip[4]).reshape(-1, num_estimators)
    trip_s = calculate_s_values(trip_X[:,1:3])

    # convert trip_X to dataframe
    # print(trip_X.shape)
    df = pd.DataFrame(trip_X, columns=['AbsoluteTime','Latitude', 'Longitude','Tsurf', 'Ta','Hours','Speed','Months'])
    # df = pd.DataFrame(trip_X, columns=['AbsoluteTime','Latitude', 'Longitude','Tsurf', 'Ta','Hours','Speed'])

   # convert trip_X to dataframe
    # df = pd.DataFrame(trip_X, columns=['Latitude', 'Longitude'])
    df['AbsoluteTime'] = df['AbsoluteTime'] - df['AbsoluteTime'].min()
    df['friction'] = trip_y
    # df['friction_pred'] = trip_y_pred
    df['Measured'] = trip_y

    # df['TMI'] = trip_y_pred[:,0]
    # df['PTMI'] = trip_y_pred[:,1]
    # df['MSMSA'] = trip_y_pred[:,2]
    # df['KSWIN'] = trip_y_pred[:,3]
    # df['ADWIN'] = trip_y_pred[:,4]
    # df['DDM'] = trip_y_pred[:,5]
    # df['PH'] = trip_y_pred[:,6]
    # df['Naive'] = trip_y_pred[:,7]

    df['TMI'] = trip_y_pred[:,0]
    df['PTMI'] = trip_y_pred[:,1]

    # df['TMI'] = trip_y_pred[:,0]
    # df['MSMSA'] = trip_y_pred[:,1]
    # df['KSWIN'] = trip_y_pred[:,2]
    # df['ADWIN'] = trip_y_pred[:,3]
    # df['DDM'] = trip_y_pred[:,4]
    # df['PH'] = trip_y_pred[:,5]
    # df['Naive'] = trip_y_pred[:,6]


    df['s'] = trip_s
    
    



    # method_names = ['TMI', 'PTMI', 'MSMSA', 'KSWIN', 'ADWIN', 'DDM', 'PH', 'Naive']
    # method_names = ['TMI', 'MSMSA', 'KSWIN', 'ADWIN', 'DDM', 'PH', 'Naive']
    method_names = ['TMI', 'PTMI']
    # create subplots

    ax[0].cla()
    ax[1].cla()
    ax[2].cla()

    # # scatter plot of friction in a lat long 2D space
    # sns.scatterplot(x='Longitude', y='Latitude', data=df, hue='friction', ax=ax[0], s=10, palette='Spectral', hue_norm=(0.1,.9),legend=False)
    # ax[0].set_title('Measured - '+str(trip_id))

    # # scatter plot of friction in a lat long 2D space
    # sns.scatterplot(x='Longitude', y='Latitude', data=df, hue='friction_pred', ax=ax[1], s=10, palette='Spectral', hue_norm=(0.1,.9), legend=False)
    # ax[1].set_title('Predicted - '+str(trip_id))

    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[0].scatter(df['Longitude'], df['Latitude'], c=df['friction'], cmap='viridis', s=3, alpha=0.7, vmin=.6, vmax=.8)
    ax[0].set_title('Measured - trip_id:'+str(trip_id))
    
    # scatter plot of friction in a lat long 2D space using plt
    sc = ax[1].scatter(df['Longitude'], df['Latitude'], c=df['TMI'], cmap='viridis', s=3, alpha=0.7, vmin=.6, vmax=.8)
    ax[0].set_xlabel('Longitude')
    ax[0].set_ylabel('Latitude')
    ax[1].set_xlabel('Longitude')
    ax[1].set_ylabel('Latitude')
    ax[1].set_title('Predicted')
    # ax[0].set_xlim(23.1, 24.10)
    # ax[0].set_ylim(60.30, 60.45)

    # ax[1].set_xlim(23.1, 24.10)
    # ax[1].set_ylim(60.30, 60.45)
    if virgin:
        virgin = False
        # add colorbar to the scatter plot
        fig.colorbar(sc, ax=ax[0], label='Friction')
        fig.colorbar(sc, ax=ax[1], label='Friction')
        fig.tight_layout()

    # plot friction over time using plt
    ax[2].plot(df['s'], df['friction'], label='Measured')

    ax[2].plot(df['s'], df['TMI'], label='TMI')
    ax[2].plot(df['s'], df['PTMI'], label='PTMI')
    # set the grid on
    ax[2].grid()
    # ax[2].plot(df['s'], df['PTMI'], label='PTMI')
    # ax[2].plot(df['s'], df['MSMSA'], label='MSMSA')
    # ax[2].plot(df['s'], df['KSWIN'], label='KSWIN')
    # ax[2].set_title('Measured vs. Predicted - '+str(trip_id) + ' - ' + str(trip[1][0,0]))
    ax[2].set_title('Friction (Measured vs. Predicted)')
    ax[2].legend()
    ax[2].set_xlabel('Trip Station (s)')
    ax[2].set_ylabel('Friction')

    # make ylim .2 to 08
    ax[2].set_ylim(0, 1)

    plt.tight_layout()
    plt.pause(0.01)

    # save the png figure in \figures directory
    plt.savefig('figures/fig_'+str(trip_id)+'_kswin.png')
    # if key n is pressed, go to next trip
    if plt.waitforbuttonpress():            
        continue


370
95
378
197
69
175
100
317
6
211
8
7
9
130
7
190
20
16
8
166
7
63
12
65
46
75
9
8
110
45
13
24
16
152
384
17
404
205
97
9
97
46
9
17
11
8
14
99
26
197
369
13
1
163
10
116
203
15
16
6
134
7
35
9
13
16
55
25
5
128
28
9
11
111
16
124
11
175
357
119
324
192
64
20
151
415
23
108
95
172
80
12
151
21
12
15
5
137
337
127
11
165
353
128
376
193
8
159
142
11
8
6
11
176
152
6
257
20
132
7
118
362
46
106
7
122
13
10
277
386
159
129
154
4
70
132
14
8
16
162
66
253
24
203
91
200
14
115
388
132
19
9
12
237
9
28
163
7
156
18
5
13
93
355
10
136
415
123
24
38
18
90
125
7
9
201
78
91
117
22
104
16
116
431
54
154
12
12
23
16
10
12
8
97
18
11
150
114
14
24
7
10
6
145
11
177
21
30
39
7
135
15
14
4
117
1
