In [56]:
import pandas as pd
pd.set_option("display.max_columns", 100)
import numpy as np
np.set_printoptions(suppress=True)
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional, Any, Union
import pickle

In [57]:
IN_CSV_DATA = Path().cwd().parent.parent / "data/4_data_split"
IN_MODEL_DATA = Path().cwd().parent.parent / "data/5_models"

In [58]:
df_train_prepped = pd.read_csv(IN_CSV_DATA/'prepared_train.csv')
df_test_prepped = pd.read_csv(IN_CSV_DATA/'prepared_test.csv')
df_train = pd.read_csv(IN_CSV_DATA/'ride_summary_train.csv')
df_test = pd.read_csv(IN_CSV_DATA/'ride_summary_test.csv')

In [59]:
numerical_feature_cols = ['total_distance_mi','total_weight_lbs','avg_cruising_speed', 'log_hours_since_last_ride',
                            'active_time_ratio', 'avg_climb_rate', 'distance_training_intensity','prior_training_load']
categorical_feature_cols = []#['year']
feature_cols = numerical_feature_cols + categorical_feature_cols

target_cols = ['best_power_4s', 'best_power_5s',
                'best_power_10s', 'best_power_20s', 'best_power_30s', 'best_power_1m',
                'best_power_2m', 'best_power_3m', 'best_power_4m', 'best_power_5m',
                'best_power_6m', 'best_power_10m', 'best_power_20m', 'best_power_30m',
                'best_power_40m', 'best_power_1h', 'best_power_2h']

In [60]:
X_train, y_train = df_train[feature_cols].values, df_train[target_cols].values
X_test, y_test = df_test[feature_cols].values, df_test[target_cols].values

## Choosing Test Rides to View

In [61]:
ride1 = df_test.loc[df_test['ride_id']=='0x66b8afe9',:] # longer, yet faster training ride
ride1_index = ride1.index.values[0]
X_ride1, y_ride1 = df_test_prepped.loc[ride1_index, feature_cols].values.reshape(-1,1), df_test_prepped.loc[ride1_index, target_cols].values.reshape(-1,1)
print(ride1_index)
ride1

8


Unnamed: 0,start_time,ride_id,total_distance_mi,total_weight_lbs,avg_cruising_speed,log_hours_since_last_ride,active_time_ratio,avg_climb_rate,distance_training_intensity,prior_training_load,year,best_power_4s,best_power_5s,best_power_10s,best_power_20s,best_power_30s,best_power_1m,best_power_2m,best_power_3m,best_power_4m,best_power_5m,best_power_6m,best_power_10m,best_power_20m,best_power_30m,best_power_40m,best_power_1h,best_power_2h
8,2024-08-11 12:34:49,0x66b8afe9,46.299179,235,15.895488,1.551606,0.739214,3.806591,1.0,0.405683,2024,532.65708,530.964034,521.218087,487.249073,433.071645,297.11166,231.833562,206.012703,195.7583,190.001457,183.873143,170.22464,163.255655,158.254779,152.983849,150.682753,144.963584


In [62]:
ride2 = df_test.loc[df_test['ride_id']=='0x66a663d9',:] # medium length, but slower training ride
ride2_index = ride2.index.values[0]
X_ride2, y_ride2 = df_test_prepped.loc[ride2_index, feature_cols].values.reshape(-1,1), df_test_prepped.loc[ride2_index, target_cols].values.reshape(-1,1)
print(ride2_index)
ride2

0


Unnamed: 0,start_time,ride_id,total_distance_mi,total_weight_lbs,avg_cruising_speed,log_hours_since_last_ride,active_time_ratio,avg_climb_rate,distance_training_intensity,prior_training_load,year,best_power_4s,best_power_5s,best_power_10s,best_power_20s,best_power_30s,best_power_1m,best_power_2m,best_power_3m,best_power_4m,best_power_5m,best_power_6m,best_power_10m,best_power_20m,best_power_30m,best_power_40m,best_power_1h,best_power_2h
0,2024-07-28 15:29:29,0x66a663d9,18.693982,235,13.108158,1.590235,0.573706,9.54074,1.0,1.113042,2024,577.740871,569.04048,509.104453,413.119335,336.610169,220.235206,194.840634,178.408365,166.465069,165.042394,161.540179,145.218199,136.721616,119.053977,107.963093,77.8944,64.671311


In [63]:
ride3 = df_test.loc[df_test['ride_id']=='0x67092425',:] # first day of OTET
ride3_index = ride3.index.values[0]
X_ride3, y_ride3 = df_test_prepped.loc[ride3_index, feature_cols].values.reshape(-1,1), df_test_prepped.loc[ride3_index, target_cols].values.reshape(-1,1)
print(ride3_index)
ride3

29


Unnamed: 0,start_time,ride_id,total_distance_mi,total_weight_lbs,avg_cruising_speed,log_hours_since_last_ride,active_time_ratio,avg_climb_rate,distance_training_intensity,prior_training_load,year,best_power_4s,best_power_5s,best_power_10s,best_power_20s,best_power_30s,best_power_1m,best_power_2m,best_power_3m,best_power_4m,best_power_5m,best_power_6m,best_power_10m,best_power_20m,best_power_30m,best_power_40m,best_power_1h,best_power_2h
29,2024-10-11 13:12:05,0x67092425,63.397111,265,11.791958,2.369794,0.681709,4.60517,1.0,0.016042,2024,502.81467,497.711775,463.83542,352.990245,253.719356,190.860033,149.463907,122.680457,115.935246,113.108256,113.288784,106.655962,94.35849,90.698001,87.647581,85.129643,77.059328


## Predict Ride Power Curves

In [64]:
model_RC = pickle.load(open(IN_MODEL_DATA / 'lasso_reg_RegChain.pkl','rb'))

In [68]:
y_ride1_pred = model_RC.predict(X_ride1.T)
y_ride2_pred = model_RC.predict(X_ride2.T)
y_ride3_pred = model_RC.predict(X_ride3.T)