In [None]:
#%pip install numpy
#%pip install pandas
#%pip install matplotlib
#%pip install scikit-learn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk

In [None]:
summary = pd.read_csv("garmin_edge_820/summary.csv")   
details = pd.read_csv("garmin_edge_820/4557226804_ACTIVITY_data.csv", sep=";")

In [None]:
details.rename(columns=lambda x: x.replace('record.', ''), inplace=True)
details['time'] = pd.to_datetime(details.pop('timestamp[s]'), unit='s').dt.time
details.set_index("time", inplace=True)

In [None]:
hr_zones = [(0, 128), (129, 146), (147, 156), (157, 165), (166, 174), (175, 179), (180, float('inf'))]
power_zones = [(0, 157), (158, 186), (187, 200), (201, 218), (219, 247), (248, 287), (288, float('inf'))]
# funzione che calcola le zone di frequenza cardiaca dato il battito in input

def get_zone(rate, zones):
    for zone, (lower, upper) in enumerate(zones, start=1):
        if lower <= rate <= upper:
            return zone

In [None]:
details['hr_zone'] = details['heart_rate[bpm]'].apply(get_zone, zones=hr_zones)
details['pwr_zone'] = details['power[watts]'].apply(get_zone, zones=power_zones)

In [None]:
details['time_since_start'] = 1
details['time_since_start'] = details['time_since_start'].cumsum().sub(1)

In [None]:
# Calcola la differenza di altitudine tra le righe adiacenti
details['altitude_diff'] = details['altitude[m]'] - details['altitude[m]'].shift(1)
details['distance_diff'] = details['distance[m]'] - details['distance[m]'].shift(1)
details[['altitude_diff', 'distance_diff']] = details[['altitude_diff', 'distance_diff']].fillna(0)

# Calcola la percentuale di pendenza
details['slope_percent'] = np.where(details['distance_diff'] == 0, 0, details['altitude_diff'] / details['distance_diff'] * 100)

In [None]:
details.dtypes

In [None]:
details.isna().sum()

In [None]:
details['speed[m/s]'] = details['speed[m/s]'].fillna(details['speed[m/s]'].mean()) 

### Data Visualization

In [None]:
plt.figure(figsize=(24, 6))
plt.plot(details['time_since_start'], details['speed[m/s]'], label="speed")
plt.plot(details['time_since_start'], details['heart_rate[bpm]'], label="bpm")
plt.plot(details['time_since_start'], details['cadence[rpm]'], label="cadence")
plt.plot(details['time_since_start'], details['altitude[m]'], label="altitude")
plt.plot(details['time_since_start'], details['power[watts]'], label="power")
plt.xlabel("tempo")
plt.legend()
plt.grid(True)
plt.show()

## Models

#### Regressione lineare

In [None]:
X = details.drop(['left_power_phase[degrees]', 
                  'left_power_phase_peak[degrees]', 
                  'right_power_phase[degrees]', 
                  'right_power_phase_peak[degrees]', 
                  'heart_rate[bpm]'], axis=1)
y = details['heart_rate[bpm]']


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# definisco l'errore relativo
def relative_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))

def print_eval(X, y, model):
    preds = model.predict(X)
    mse = mean_squared_error(y, preds)
    re = relative_error(y, preds)
    r2 = r2_score(y, preds)
    print(f"   Mean squared error: {mse:.5}")
    print(f"       Relative error: {re:.5%}")
    print(f"R-squared coefficient: {r2:.5}")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
lrm = LinearRegression()
lrm.fit(X_train, y_train)
print_eval(X_val, y_val, lrm)

In [None]:
from sklearn.linear_model import Lasso
lsm = Lasso(alpha=0.3)
lsm.fit(X_train, y_train)
print_eval(X_val, y_val, lsm)

In [None]:
from sklearn.linear_model import Ridge
rrm = Ridge(alpha=0.5)
rrm.fit(X_train, y_train)
print_eval(X_val, y_val, rrm)

In [None]:
from sklearn.linear_model import ElasticNet
enm = ElasticNet(alpha=0.1, l1_ratio=0.5)
enm.fit(X_train, y_train)
print_eval(X_val, y_val, enm)

In [None]:
pd.DataFrame({
    "linear": lrm.coef_,
    "ridge": rrm.coef_,
    "lasso": lsm.coef_
}, index=X_train.columns)

#### Regressione Polinomiale

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

In [None]:
lrm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=4, include_bias=False)),
    ("linear", LinearRegression())
])
lrm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, lrm_poly)

In [None]:
lsm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ("lasso", Lasso(alpha=0.7)) 
])
lsm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, lsm_poly)

In [None]:
rrm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("ridge", Ridge(alpha=1))
])
rrm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, rrm_poly)

In [None]:
enm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ("elasticnet", ElasticNet(alpha=0.1, l1_ratio=0.5))
])
enm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, enm_poly)

#### Regressione polinomiale con standardizzazione

In [None]:
from sklearn.preprocessing import StandardScaler

lrm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("linear", LinearRegression())
])
lrm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, lrm_poly_std)

In [None]:
lsm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("lasso", Lasso(alpha=0.7))
])
lsm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, lsm_poly_std)

In [None]:
rrm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("ridge", Ridge(alpha=1))
])
rrm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, rrm_poly_std)

In [None]:
enm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("elasticnet", ElasticNet(alpha=0.1, l1_ratio=0.5))
])
enm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, enm_poly_std)

#### Regressione con funzioni kernel

In [None]:
from sklearn.kernel_ridge import KernelRidge

In [None]:
krm_poly = Pipeline([
    ("std", StandardScaler()),
    ("kernel", KernelRidge(alpha=20, kernel="poly", degree=6))
])
krm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, krm_poly)

In [None]:
krm_rbf = Pipeline([
    ("std", StandardScaler()),
    ("kernel", KernelRidge(alpha=0.1, kernel="rbf", gamma=0.1))
])
krm_rbf.fit(X_train, y_train)
print_eval(X_val, y_val, krm_rbf)