# Inizializzazione

In [None]:
# %pip install numpy
# %pip install pandas
# %pip install matplotlib
# %pip install scikit-learn
# %pip install lightgbm
# %pip install xgboost
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk

In [None]:
summary = pd.read_csv("garmin_edge_820/summary.csv", sep=";")
details = pd.read_csv("garmin_edge_820/4557226804_ACTIVITY_data.csv", sep=";")

In [None]:
# Pulizia dei dati
details = details.drop(['left_power_phase[degrees]',
                        'left_power_phase_peak[degrees]',
                        'right_power_phase[degrees]',
                        'right_power_phase_peak[degrees]',
                        'left_right_balance'], axis=1)
# convertire i valori di tempo in formato datetime
details['time'] = pd.to_datetime(details.pop('timestamp[s]'), unit='s').dt.time
details.set_index("time", inplace=True)

def convert_brackets(string):
    return string.replace('[', '(').replace(']', ')')

details.columns = [convert_brackets(col) for col in details.columns]

# calcolo il tempo trascorso dall'inizio dell'attività
details['time_since_start'] = 1
details['time_since_start'] = details['time_since_start'].cumsum().sub(1)

# calcolo le zone di frequenza cardiaca e di potenza dato il battito in input

hr_zones = [(0, 128), (129, 146), (147, 156), (157, 165),
            (166, 174), (175, 179), (180, float('inf'))]
power_zones = [(0, 157), (158, 186), (187, 200), (201, 218),
               (219, 247), (248, 287), (288, float('inf'))]

def get_zone(rate, zones):
    for zone, (lower, upper) in enumerate(zones, start=1):
        if lower <= rate <= upper:
            return zone

details['hr_zone'] = details['heart_rate(bpm)'].apply(get_zone, zones=hr_zones)
details['pwr_zone'] = details['power(watts)'].apply(get_zone, zones=power_zones)

# Calcola la differenza di altitudine tra le righe adiacenti
details['altitude_diff'] = details['altitude(m)'] - \
    details['altitude(m)'].shift(1)
details['distance_diff'] = details['distance(m)'] - \
    details['distance(m)'].shift(1)
details[['altitude_diff', 'distance_diff']] = details[[
    'altitude_diff', 'distance_diff']].fillna(0)
# Calcola la percentuale di pendenza
details['slope_percent'] = np.where(
    details['distance_diff'] == 0, 0, details['altitude_diff'] / details['distance_diff'] * 100)

In [None]:
details.describe()

In [None]:
details.info()

In [None]:
details['speed(m/s)'] = details['speed(m/s)'].fillna(details['speed(m/s)'].mean())

# Data Visualization

In [None]:
plt.figure(figsize=(24, 6))
plt.plot(details['time_since_start'], details['power(watts)'], label="power")
plt.plot(details['time_since_start'], details['cadence(rpm)'], label="cadence")
plt.plot(details['time_since_start'], details['speed(m/s)'], label="speed")
plt.plot(details['time_since_start'], details['heart_rate(bpm)'], label="bpm")
plt.plot(details['time_since_start'], details['altitude(m)'], label="altitude")
plt.xlabel("tempo")
plt.legend()
plt.grid(True)
plt.show()

# Previsioni

In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_validate, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor, plot_tree, export_text
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from utilities import print_eval

In [None]:
kf = KFold(n_splits=3, shuffle=True, random_state=42)

## Previsione battito

In [None]:
X = details.drop('heart_rate(bpm)', axis=1)
y = details['heart_rate(bpm)']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1/3, random_state=42)

### Regressione lineare

In [None]:
pipe = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)),
    ("std", None),
    ("model", None)
])

common_grid = {
    "poly__degree": [1, 2],
    "std": [None, StandardScaler()],
}

regressor_grid = [
    {
        "model": [LinearRegression()],
    },
    {
        "model": [Lasso()],
        "model__alpha": [0.1]
    },
    {
        "model": [Ridge()],
        "model__alpha": [0.1]
    },
    {
        "model": [ElasticNet()],
        "model__alpha": [0.1],
        "model__l1_ratio": [0.1]
    },
]

grid = [dict(common_grid, **model) for model in regressor_grid]

In [None]:
liner_models_gs = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
liner_models_gs.fit(X_train, y_train)

In [None]:
linear_models_gs_res = pd.DataFrame(liner_models_gs.cv_results_).sort_values("mean_test_score", ascending=False)
linear_models_gs_res

In [None]:
print_eval(X_val, y_val, liner_models_gs)

In [None]:
linear_models_rs = RandomizedSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
linear_models_rs.fit(X_train, y_train)

In [None]:
linear_models_rs_res = pd.DataFrame(linear_models_rs.cv_results_).sort_values("mean_test_score", ascending=False)
linear_models_rs_res

In [None]:
print_eval(X_val, y_val, linear_models_rs)

In [None]:
linear_models_gs_res.plot.scatter(linear_models_gs_res.index, "mean_test_score")

### Regressione con funzioni kernel

In [None]:
pipe = Pipeline([
  ("std", None),
  ("model", KernelRidge())
])

common_grid = {
  "std": [StandardScaler()],
  "model__alpha": [0.1],
}

model_grid = [
  {
      "model__kernel": ["poly"],
      'model__degree': [1,2],
  },
  {
      "model__kernel": ["rbf"],
      "model__gamma": [0.1],
  }
]

grid = [dict(common_grid, **model) for model in model_grid]

In [None]:
krm_gs = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
krm_gs.fit(X_train, y_train)

In [None]:
krm_gs_res = pd.DataFrame(krm_gs.cv_results_).sort_values("mean_test_score", ascending=False)
krm_gs_res

In [None]:
print_eval(X_val, y_val, krm_gs)

In [None]:
krm_rs = RandomizedSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
krm_rs.fit(X_train, y_train)

In [None]:
krm_rs_res = pd.DataFrame(krm_rs.cv_results_).sort_values("mean_test_score", ascending=False)
krm_rs_res

In [None]:
print_eval(X_val, y_val, krm_rs)

### Alberi di regressione

In [None]:
pipe = Pipeline([
  ("std", None),
  ("model", None)
])

grid_common = {
  "std": [None, StandardScaler()],
  "model__max_depth": [10],
}

model_grid = [
  {
    "model": [DecisionTreeRegressor()],
  },
  {
    "model": [RandomForestRegressor()],
    "model__max_samples": [0.1],
    "model__max_features": ["sqrt", "log2"],
    "model__n_estimators": [1000],
  },
  {
    "model": [LGBMRegressor()],
    "model__n_estimators": [1000],
    "model__learning_rate": [0.05],
    "model__num_leaves": [10],
  },
  {
    "model": [XGBRegressor()],
    "model__n_estimators": [1000],
    "model__learning_rate": [0.05],
  }
]

grid = [dict(grid_common, **params) for params in model_grid]

In [None]:
tree_gs = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
tree_gs.fit(X_train, y_train)

In [None]:
tree_gs_res = pd.DataFrame(tree_gs.cv_results_).sort_values("mean_test_score", ascending=False)
tree_gs_res

In [None]:
print_eval(X_val, y_val, tree_gs, tree=True)

In [None]:
tree_rs = RandomizedSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
tree_rs.fit(X_train, y_train)

In [None]:
tree_rs_res = pd.DataFrame(tree_rs.cv_results_).sort_values("mean_test_score", ascending=False)
tree_rs_res

In [None]:
print_eval(X_val, y_val, tree_rs, tree=True)

## Previsione Potenza

In [None]:
copy_details = details[details['power(watts)'] != 0]
features = copy_details.drop('power(watts)', axis=1)
target = copy_details['power(watts)']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=1/3, random_state=42)

### Regressione con modelli lineari e polinomiali

In [None]:
pipe = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)),
    ("std", None),
    ("regressor", None)
])
grid_common = {
    "poly__degree": [1,2,3],
    "std": [StandardScaler()],
}
grid_regressors = [
    {
        "regressor": [LinearRegression()],
    },
    {
        "regressor": [Lasso()],
        "regressor__alpha": [0.1,1],
    },
    {
        "regressor": [Ridge()],
        "regressor__alpha": [0.1,1],
    },
    {
        "regressor": [ElasticNet()],
        "regressor__alpha": [0.1,1],
        "regressor__l1_ratio": [0.1],
    },
]

grid = [dict(grid_common, **params) for params in grid_regressors]
model = GridSearchCV(pipe, grid, cv=kf, scoring='r2', n_jobs=-1)
model.fit(X_train, y_train)
pd.DataFrame(model.cv_results_).sort_values("mean_test_score", ascending=False)
#y_pred = model.predict(X_test)
#new_data = pd.DataFrame([[4, 140, 90]], columns=features)
#watts_pred = model.predict(new_data)
#print("Potenza predetta:", watts_pred)

### Regressione con funzioni kernel

In [None]:
pipe = Pipeline([
    ("std", None),
    ("regressor", None)
])
grid_common = {
    "std": [StandardScaler()],
    "regressor": [KernelRidge()],
    'regressor__alpha': [0.1,1],
}
grid_regressors = [
    {
        "regressor__kernel": ["poly"],
        'regressor__degree': [1,2,3],
    },
    {
        "regressor__kernel": ["rbf"],
        "regressor__gamma": [0.1,1],
    }
]

grid = [dict(grid_common, **params) for params in grid_regressors]
model = GridSearchCV(pipe, grid, cv=kf, scoring='r2', n_jobs=-1)
model.fit(X_train, y_train)
pd.DataFrame(model.cv_results_).sort_values("mean_test_score", ascending=False)

### Regressione con alberi

In [None]:
pipe = Pipeline([
    ("std", None),
    ("regressor", None)
])

grid_common = {
    "std": [StandardScaler()],
     "regressor__max_depth": [5, 10, 15],
}
grid_regressors = [
    {
        "regressor": [DecisionTreeRegressor()],
    },
    {
        "regressor": [RandomForestRegressor()],
        "regressor__n_estimators": [100, 200, 300],
    },
    {
        "regressor": [LGBMRegressor()],
        "regressor__n_estimators": [100, 200, 300],
        "regressor__learning_rate": [0.01, 0.05, 0.1],
    },
    {
        "regressor": [XGBRegressor()],
        "regressor__n_estimators": [100, 200, 300],
        "regressor__learning_rate": [0.01, 0.05, 0.1],
    }
]
grid = [dict(grid_common, **params) for params in grid_regressors]
model = GridSearchCV(pipe, grid, cv=kf, scoring='r2', n_jobs=-1)
model.fit(X_train, y_train)
pd.DataFrame(model.cv_results_).sort_values("mean_test_score", ascending=False)

# Classificazione

## Classificazione della potenza

In [None]:
X = details[["time_since_start", "power(watts)"]]
y = details['pwr_zone']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42)

diagnosis_color_map = {1: "yellow", 2: "orange", 3: "red", 4: "purple", 5: "blue", 6: "green", 7: "black"}
X_train.plot.scatter("time_since_start", "power(watts)",c=y_train.map(diagnosis_color_map),figsize=(24, 6))

In [None]:
zone_counts = details['pwr_zone'].value_counts()

zone_counts.plot.barh(figsize=(24, 6), legend=None)

plt.xlabel('tempo')
plt.ylabel('Zona')
plt.title('Conteggio del tempo a seconda delle zone')
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_text

X = details.drop(['pwr_zone'], axis=1)
y = details['pwr_zone']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1/3, random_state=42)

model = DecisionTreeClassifier(max_depth=None)
model.fit(X_train, y_train)

plt.figure(figsize=(24, 9))
plot_tree(model, feature_names=X_train.columns.to_list())
print(export_text(model, feature_names=X_train.columns.to_list()))