In [18]:
import sys
sys.path.insert(0, '/home/zahradnik/PycharmProjects/master_thesis')

In [19]:
from src.modules import conf, fit, df_columns, evl, preprocess
import pandas as pd
import warnings
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer
from scipy.ndimage import uniform_filter1d

import numpy as np
from feature_engine.datetime import DatetimeFeatures
from feature_engine.timeseries.forecasting import WindowFeatures
from sklearn.linear_model import Ridge, LinearRegression, RidgeCV,ElasticNetCV, LassoCV
from tqdm import tqdm
warnings.simplefilter("ignore")

In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
%matplotlib inline
plt.rcParams['figure.figsize'] = [24, 8]
plt.rcParams['figure.dpi']

100.0

In [195]:
data = fit.load_pcls(
    athlete_name='zahradnik',
    activity_type='running',
    path_to_load=conf["Paths"]["pcl"],
)

100%|██████████| 386/386 [00:00<00:00, 2264.53it/s]


In [196]:
# ZONES ADD
for x in range(len(data)):
    data[x] = data[x][df_columns]
    mean_hr = np.mean(data[x].heart_rate)
    if mean_hr < 140:
        zone = 1
    elif mean_hr > 140 and mean_hr < 156:
        zone = 2
    elif mean_hr > 156 and mean_hr < 166:
        zone = 3
    elif mean_hr > 166 and mean_hr < 175:
        zone = 4
    elif mean_hr > 175:
        zone = 5
    data[x]['hr_zone'] = zone

In [197]:
low_dist = []
high_dist = []
for act in data:
    if np.max(act.distance) > 10000:
        high_dist.append(act)
    else:
        low_dist.append(act)
len(low_dist),len(high_dist)

(149, 237)

In [198]:
# race_index = fit.get_race_index(high_dist, "2022-10-02-10-30") # HRADEC - 1:16 - 1:14
# race_index = fit.get_race_index(low_dist, "2022-11-13-10-59") # VELKA KUNRATICKA 15:20 - 13:20
# race_index = fit.get_race_index(high_dist, "2022-10-08-11-18") # BEROUNKA RUN 90:30 - 95:30
race_index = fit.get_race_index(high_dist, "2022-12-31-11-00") # SILVESTRAK 18:20 - 17:20
print(race_index)
train_df = fit.clean_data(pd.concat(high_dist[0:race_index]))
test_df = data[fit.get_race_index(data, "2022-12-31-11-00")]
len(train_df),len(test_df)

224


(701542, 1037)

In [199]:
def drop_features(df: pd.DataFrame, endog: str):

    true_values = df[endog]
    df.drop(endog,axis=1,inplace=True)
    return df, true_values

In [200]:
train_df, speed_train = drop_features(train_df, "enhanced_speed")
test_df, speed_test = drop_features(test_df, "enhanced_speed")

train_df ,heart_train = drop_features(train_df, "heart_rate")
test_df, heart_test = drop_features(test_df, "heart_rate")

train_df ,cad_train = drop_features(train_df, "cadence")
test_df, cad_test = drop_features(test_df, "cadence")

In [201]:
train_df['date'] = train_df.index
test_df['date'] = test_df.index

In [202]:
proc_pipeline = Pipeline([
    ("dt_features",
     DatetimeFeatures(features_to_extract=['month','week','hour','minute','second']))
])

In [203]:
lagged_cols = ['slope_steep','slope_ascent','slope_descent']
lagged = 18
for lag in range(1,lagged):
    proc_pipeline.steps.append([f"lag_feature_{lag}",
                           WindowFeatures(
                           variables=lagged_cols,
                           window=lag)])

In [282]:
pipelined_train_df = proc_pipeline.fit_transform(train_df)
pipelined_test_df = proc_pipeline.fit_transform(test_df)

In [283]:
imputer = SimpleImputer(strategy="constant",fill_value=0)

pipelined_train_df = pd.DataFrame(imputer.fit_transform(pipelined_train_df),columns=pipelined_train_df.columns)
pipelined_test_df = pd.DataFrame(imputer.fit_transform(pipelined_test_df),columns=pipelined_test_df.columns)

In [263]:
import itertools
models = [
    ['LassoCV', LassoCV],
    ['RidgeCV', RidgeCV],
    ['ElasticNetCV', ElasticNetCV]]


pred_cols = [
    ['cadence', cad_train, cad_test],
    ['heart_rate', heart_train, heart_test],
    ['enhanced_speed', speed_train, speed_test]]
percent_delay = int(len(test_df) * 0.02)

In [264]:
def get_comb(comb: list, pred_cols: list):
    return [sub[item] for item in range(len(pred_cols)) for sub in [comb, pred_cols]]

In [305]:
# heart_rate_window_17_mean_x
def calc_lag(df: pd.DataFrame, col: list, lagged = 18):
    for lag in range(1,lagged):
        wft = WindowFeatures(variables=col,
                         window=lag)
        df = wft.fit_transform(df)
    return df

In [266]:
def fillna_df(train: pd.DataFrame):
    return pd.DataFrame(imputer.fit_transform(train),columns=train.columns)

# Cadence

In [267]:
rmse_cad = []
pred_cad = []
for model_name, model_type in models:
    model = model_type()
    model.fit(pipelined_train_df, cad_train)
    y_pred = model.predict(pipelined_test_df)
    rmse_cad.append(evl.rmse(actual=cad_test,predicted=y_pred))
    pred_cad.append(y_pred)
list(zip(rmse_cad))

[(6.880891292715592,), (1.7506372782594255,), (6.880742863450284,)]

In [284]:
pipelined_train_df['cadence'] = cad_train
pipelined_train_df = fillna_df(pipelined_train_df)
pipelined_train_df = calc_lag(pipelined_train_df, ['cadence'])
pipelined_train_df = fillna_df(pipelined_train_df)

# Heart Rate

In [274]:
rmse_hr = []
pred_hr = []
#  pred_cad[rmse_cad.index(np.min(rmse))]
for model_name, model_type in models:
    for pred in pred_cad:

        model_test_df = pipelined_test_df

        model_test_df['cadence'] = pred
        model_test_df = fillna_df(model_test_df)
        model_test_df = calc_lag(model_test_df, ['cadence'])
        model_test_df = fillna_df(model_test_df)

        model = model_type()
        model.fit(pipelined_train_df, heart_train)
        y_pred = model.predict(model_test_df)
        rmse_hr.append(evl.rmse(actual=heart_test,predicted=y_pred))
        pred_hr.append(y_pred)
list(rmse_hr)

[42.22019426863714,
 42.22019426863714,
 42.22019426863714,
 10.509164244160708,
 10.509164244160708,
 10.509164244160708,
 42.22017617281818,
 42.22017617281818,
 42.22017617281818]

In [285]:
pipelined_train_df['heart_rate'] = heart_train
pipelined_train_df = fillna_df(pipelined_train_df)
pipelined_train_df = calc_lag(pipelined_train_df, ['heart_rate'])
pipelined_train_df = fillna_df(pipelined_train_df)
for x in range(10,110,10):
    pipelined_train_df[f'moved_heart_rate_{x}'] = uniform_filter1d(pipelined_train_df.heart_rate, size=x)

In [306]:
rmse_speed = []
pred_speed = []

for model_name, model_type in models:
    for pred_c in pred_cad:

        model_test_df = pipelined_test_df

        model_test_df['cadence'] = pred_c
        model_test_df = fillna_df(model_test_df)
        model_test_df = calc_lag(model_test_df, ['cadence'])
        model_test_df = fillna_df(model_test_df)

        for pred in pred_hr:

            model_test_df_hr = model_test_df

            model_test_df_hr['heart_rate'] = pred
            model_test_df_hr = fillna_df(model_test_df_hr)
            model_test_df_hr = calc_lag(model_test_df_hr, ['heart_rate'])
            model_test_df_hr = fillna_df(model_test_df_hr)

            for x in range(10,110,10):
                model_test_df_hr[f'moved_heart_rate_{x}'] = uniform_filter1d(model_test_df_hr.heart_rate, size=x)

            model = model_type()
            model.fit(pipelined_train_df, heart_train)
            y_pred = model.predict(model_test_df_hr)
            rmse_speed.append(evl.rmse(actual=speed_test,predicted=y_pred))
            pred_speed.append(y_pred)
    break
list(rmse_speed)

[123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614,
 123.99995967087614]

In [None]:
import math
np.mean(speed_test),np.mean(pred_speed),max(test_df.distance)
time = ((max(test_df.distance)/1000) / np.mean(pred_speed))*60
minutes = math.floor(time)
seconds = round((time-minutes)*60)
f'Final time: {minutes}:{seconds}'