# Inizializzazione

In [None]:
# %pip install numpy
# %pip install pandas
# %pip install matplotlib
# %pip install scikit-learn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk

In [None]:
summary = pd.read_csv("garmin_edge_820/summary.csv", sep=";")
details = pd.read_csv("garmin_edge_820/4557226804_ACTIVITY_data.csv", sep=";")

In [None]:
# Pulizia dei dati
details = details.drop(['left_power_phase[degrees]',
                        'left_power_phase_peak[degrees]',
                        'right_power_phase[degrees]',
                        'right_power_phase_peak[degrees]',
                        'left_right_balance'], axis=1)
# convertire i valori di tempo in formato datetime
details['time'] = pd.to_datetime(details.pop('timestamp[s]'), unit='s').dt.time
details.set_index("time", inplace=True)

# calcolo il tempo trascorso dall'inizio dell'attività
details['time_since_start'] = 1
details['time_since_start'] = details['time_since_start'].cumsum().sub(1)

# calcolo le zone di frequenza cardiaca e di potenza dato il battito in input

hr_zones = [(0, 128), (129, 146), (147, 156), (157, 165),
            (166, 174), (175, 179), (180, float('inf'))]
power_zones = [(0, 157), (158, 186), (187, 200), (201, 218),
               (219, 247), (248, 287), (288, float('inf'))]

def get_zone(rate, zones):
    for zone, (lower, upper) in enumerate(zones, start=1):
        if lower <= rate <= upper:
            return zone

details['hr_zone'] = details['heart_rate[bpm]'].apply(get_zone, zones=hr_zones)
details['pwr_zone'] = details['power[watts]'].apply(get_zone, zones=power_zones)

# Calcola la differenza di altitudine tra le righe adiacenti
details['altitude_diff'] = details['altitude[m]'] - \
    details['altitude[m]'].shift(1)
details['distance_diff'] = details['distance[m]'] - \
    details['distance[m]'].shift(1)
details[['altitude_diff', 'distance_diff']] = details[[
    'altitude_diff', 'distance_diff']].fillna(0)
# Calcola la percentuale di pendenza
details['slope_percent'] = np.where(
    details['distance_diff'] == 0, 0, details['altitude_diff'] / details['distance_diff'] * 100)

In [None]:
details.describe()

In [None]:
details.dtypes

In [None]:
details.isna().sum()

In [None]:
details['speed[m/s]'] = details['speed[m/s]'].fillna(details['speed[m/s]'].mean())

# Data Visualization

In [None]:
plt.figure(figsize=(24, 6))
plt.plot(details['time_since_start'], details['power[watts]'], label="power")
plt.plot(details['time_since_start'], details['cadence[rpm]'], label="cadence")
plt.plot(details['time_since_start'], details['speed[m/s]'], label="speed")
plt.plot(details['time_since_start'], details['heart_rate[bpm]'], label="bpm")
plt.plot(details['time_since_start'], details['altitude[m]'], label="altitude")
plt.xlabel("tempo")
plt.legend()
plt.grid(True)
plt.show()

# Previsioni

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import KFold, cross_validate, GridSearchCV

In [None]:
# definisco l'errore relativo
def relative_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))


def print_eval(X, y, model):
    preds = model.predict(X)
    mse = mean_squared_error(y, preds)
    re = relative_error(y, preds)
    r2 = r2_score(y, preds)
    print(f"   Mean squared error: {mse:.5}")
    print(f"       Relative error: {re:.5%}")
    print(f"R-squared coefficient: {r2:.5}")


kf = KFold(n_splits=5, shuffle=True, random_state=42)

## Previsione battito

In [None]:
X = details.drop('heart_rate[bpm]', axis=1)
y = details['heart_rate[bpm]']
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42)

### Regressione lineare

In [None]:
lrm = LinearRegression()
lrm.fit(X_train, y_train)
print_eval(X_val, y_val, lrm)

In [None]:
lsm = Lasso(alpha=0.3)
lsm.fit(X_train, y_train)
print_eval(X_val, y_val, lsm)

In [None]:
rrm = Ridge(alpha=0.5)
rrm.fit(X_train, y_train)
print_eval(X_val, y_val, rrm)

In [None]:
enm = ElasticNet(alpha=0.1, l1_ratio=0.5)
enm.fit(X_train, y_train)
print_eval(X_val, y_val, enm)

In [None]:
pd.DataFrame({
    "linear": lrm.coef_,
    "ridge": rrm.coef_,
    "lasso": lsm.coef_
}, index=X_train.columns)

### Regressione Polinomiale

In [None]:
lrm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=4, include_bias=False)),
    ("linear", LinearRegression())
])
lrm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, lrm_poly)

In [None]:
lsm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ("lasso", Lasso(alpha=0.7))
])
lsm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, lsm_poly)

In [None]:
rrm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("ridge", Ridge(alpha=1))
])
rrm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, rrm_poly)

In [None]:
enm_poly = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ("elasticnet", ElasticNet(alpha=0.1, l1_ratio=0.5))
])
enm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, enm_poly)

### Regressione polinomiale con standardizzazione

In [None]:
lrm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("linear", LinearRegression())
])
lrm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, lrm_poly_std)

In [None]:
lsm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("lasso", Lasso(alpha=0.7))
])
lsm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, lsm_poly_std)

In [None]:
rrm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("ridge", Ridge(alpha=1))
])
rrm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, rrm_poly_std)

In [None]:
enm_poly_std = Pipeline([
    ("poly", PolynomialFeatures(degree=3, include_bias=False)),
    ("std", StandardScaler()),
    ("elasticnet", ElasticNet(alpha=0.1, l1_ratio=0.5))
])
enm_poly_std.fit(X_train, y_train)
print_eval(X_val, y_val, enm_poly_std)

### Regressione con funzioni kernel

In [None]:
krm_poly = Pipeline([
    ("std", StandardScaler()),
    ("kernel", KernelRidge(alpha=20, kernel="poly", degree=6))
])
krm_poly.fit(X_train, y_train)
print_eval(X_val, y_val, krm_poly)

In [None]:
krm_rbf = Pipeline([
    ("std", StandardScaler()),
    ("kernel", KernelRidge(alpha=0.1, kernel="rbf", gamma=0.1))
])
krm_rbf.fit(X_train, y_train)
print_eval(X_val, y_val, krm_rbf)

### Cross validation

In [None]:
pd.DataFrame(cross_validate(lrm, X, y, cv=kf, return_train_score=True)
             ).describe().loc[["mean", "std"]]

In [None]:
pd.DataFrame(cross_validate(lrm_poly, X, y, cv=kf,
             return_train_score=True)).describe().loc[["mean", "std"]]

In [None]:
pd.DataFrame(cross_validate(krm_rbf, X, y, cv=kf,
             return_train_score=True)).describe().loc[["mean", "std"]]

#### Fine tuning iperparametri

In [None]:
lassoCV = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)),
    ("std", None),
    ("lasso", Lasso())
])

grid = {
    "poly__degree": np.arange(1, 5),
    "std": [None, StandardScaler()],
    "lasso__alpha": np.logspace(-3, 0, 10)
}

lasso_gs = GridSearchCV(lassoCV, grid, cv=kf, return_train_score=True)
lasso_gs.fit(X_train, y_train)
pd.DataFrame(lasso_gs.cv_results_).sort_values(
    "mean_test_score", ascending=False)
print_eval(X_val, y_val, lasso_gs)

In [None]:
def kernel_ridgeCV(models):
    scores = {}
    for model in models:
        mod, grid = model
        gs = GridSearchCV(mod, grid, cv=kf, scoring='r2', return_train_score=True, n_jobs=-1)
        gs.fit(X_train, y_train)
        scores[str(mod['kernel_ridge__kernel'])] = gs.cv_results_

# Definizione dei modelli
models = [
    (
        Pipeline([
            ("std", None),
            ("kernel_ridge", KernelRidge(kernel='poly'))
        ]),
        {
            'kernel_ridge__degree': range(1, 6),
            'kernel_ridge__alpha': np.logspace(-3, 0, 10)
        }
    ),
    (
        Pipeline([
            ("std", None),
            ("kernel_ridge", KernelRidge(kernel='rbf'))
        ]),
        {
            'kernel_ridge__alpha': np.logspace(-3, 0, 10),
            'kernel_ridge__gamma': np.logspace(-3, 0, 10)
        }
    )
]

# Esegui la ricerca del modello
kernel_ridgeCV(models)

# Accesso ai risultati
for model, result in scores.items():
    print("Model:", model)
    print("Mean R2 score:", result['mean_test_score'])
    print("Params:", result['params'])
    print("-------------------------")


## Previsione Potenza

In [None]:
copy_details = details[details['power[watts]'] != 0]
# Seleziona le colonne che saranno utilizzate come feature per la predizione
features = copy_details.drop('power[watts]', axis=1)

# Seleziona la colonna come target
target = copy_details['power[watts]']

# Dividi il dataset in set di addestramento e set di test
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42)

# Crea un'istanza del modello di regressione lineare
pipe = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)),
    ("std", None),
    ("regressor", None)
])

grid_common = {
    "poly__degree": np.arange(1, 5),
    "std": [None, StandardScaler()],
}

grid_regressors = [
    {
        "regressor": [LinearRegression()],
    },
    {
        "regressor": [Lasso()],
        "regressor__alpha": np.logspace(-3, 0, 10),
    },
    {
        "regressor": [Ridge()],
        "regressor__alpha": np.logspace(-3, 0, 10),
    },
    {
        "regressor": [ElasticNet()],
        "regressor__alpha": np.logspace(-3, 0, 10),
        "regressor__l1_ratio": np.linspace(0, 1, 5)
    },
]

grid = [dict(grid_common, **params) for params in grid_regressors]

#model = GridSearchCV(pipe, grid, cv=kf, scoring='r2', n_jobs=-1)
model = Pipeline([
    ("std", StandardScaler()),
    ("kernel", KernelRidge(alpha=0.1, kernel="rbf", gamma=0.1))
])
# Addestra il modello sui dati di addestramento
model.fit(X_train, y_train)
print_eval(X_test, y_test, model)
# Effettua la predizione sul set di test
y_pred = model.predict(X_test)




# Ora puoi utilizzare il modello addestrato per predire nuovi dati
#new_data = pd.DataFrame([[4, 140, 90]], columns=features)
#watts_pred = model.predict(new_data)
#print("Potenza predetta:", watts_pred)

# Classificazione

## Classificazione della potenza

In [None]:
X = details[["time_since_start", "power[watts]"]]
y = details['pwr_zone']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

diagnosis_color_map = {1: "yellow", 2: "orange", 3: "red",
                       4: "purple", 5: "blue", 6: "green", 7: "black"}
X_train.plot.scatter("time_since_start", "power[watts]",
                     c=y_train.map(diagnosis_color_map),
                     figsize=(24, 6))

In [None]:
zone_counts = details['pwr_zone'].value_counts()

zone_counts.plot.barh(figsize=(24, 6), legend=None)

plt.xlabel('tempo')
plt.ylabel('Zona')
plt.title('Conteggio del tempo a seconda delle zone')
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree

X = details.drop(['pwr_zone'], axis=1)
y = details['pwr_zone']

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier(max_depth=7)
model.fit(X_train, y_train)

plt.figure(figsize=(24, 9))
plot_tree(model, feature_names=X_train.columns.to_list())