In [17]:
import pandas as pd
from connections import AWS

$\textbf{Epidemiology: Model Retraining}$

Model from Aim 2 is retrained on the entire dataset. RMSE is noted with a reference to the estimated out of sample error (~ 4.61 Nm) from previous study.

In [18]:
""" INITIALIZE AWS CONNECTION """
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


In [None]:
# download model dev data
model_dev_data = pd.read_csv('storage/model_dev_raw.csv')

# remove outlier subject (event detection issue)
model_dev_data = model_dev_data[model_dev_data['subject_id'] != 2676]

# load subject info and merge; normalize torque values
subject_info = aws_connection.load_subject_info()
model_data = model_dev_data.merge(
    subject_info,
    on='subject_id',
    how='left'
)
model_data.insert(
    4,
    'peak_value_normalized', 
    model_data['peak_value'] / (model_data['height'] * model_data['mass'] * 9.81)
)


$\textbf{Model Training}$

Note the need to separate by year:
- __2015__: Trained w/out spin axis
- __> 2016__: Trained w/ spin axis

The number of estimators was held consistent with the LOOCV version from D2. Both models slightly outperformed this model, showing improvements by 0.20 Nm and 0.28 Nm, respectively. 

In [21]:
from services.training import train_rf_model

In [None]:
# 2015 model (no spin axis)
    # RMSE: 4.41
retrained_model_2015 = train_rf_model(
    train_data=model_data,
    features=[
        'rel_speed',
        'rel_side',
        'rel_ht',
        'spin_rate',
        'ax0',
        'ay0',
        'az0'
    ],
    target='peak_value_normalized',
    params={'n_estimators': 250, 'random_state': 22}
)

# all other years (with spin axis)
    # RMSE: 4.33 Nm
retrained_model = train_rf_model(
    train_data=model_data,
    features=[
        'rel_speed',
        'rel_side',
        'rel_ht',
        'spin_rate',
        'spin_axis',
        'ax0',
        'ay0',
        'az0'
    ],
    target='peak_value_normalized',
    params={'n_estimators': 250,'random_state': 22}
)

$\textbf{Uploads}$

In [42]:
import pickle

In [43]:
# save models to disc
with open(f'models/evt_2015.pkl', 'wb') as f:
    pickle.dump(retrained_model_2015, f)
with open(f'models/evt_modern.pkl', 'wb') as f:
    pickle.dump(retrained_model, f)

# upload to S3
aws_connection.s3.upload_file(
    'models/evt_2015.pkl',
    aws_connection.bucket_name, 
    'epidemiology/models/evt_2015.pkl'
)
aws_connection.s3.upload_file(
    'models/evt_modern.pkl',
    aws_connection.bucket_name, 
    'epidemiology/models/evt_modern.pkl'
)
