# Inizializzazione

In [None]:
# %pip install numpy
# %pip install pandas
# %pip install matplotlib
# %pip install scikit-learn
# %pip install lightgbm
# %pip install xgboost
# %pip install flask
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
import os
import pickle
import flask
from sklearn.model_selection import train_test_split, KFold, cross_validate, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor, plot_tree, export_text,DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from utilities import print_eval


In [None]:
def convert_brackets(string):
    return string.replace('[', '(').replace(']', ')')

summary = pd.read_csv("garmin_edge_820/summary.csv", sep=";")

file_list = ["garmin_edge_820/4557226804_ACTIVITY_data.csv",
             "garmin_edge_820/4593452980_ACTIVITY_data.csv",
             "garmin_edge_820/5191513011_ACTIVITY_data.csv",
             "garmin_edge_820/3993730634_ACTIVITY_data.csv",
             "garmin_edge_820/5191513011_ACTIVITY_data.csv"]

dataframes = []

hr_zones = [(0, 128), (129, 146), (147, 156), (157, 165),(166, 174), (175, 179), (180, float('inf'))]
power_zones = [(0, 157), (158, 186), (187, 200), (201, 218),(219, 247), (248, 287), (288, float('inf'))]

def get_zone(rate, zones):
    for zone, (lower, upper) in enumerate(zones, start=1):
        if lower <= rate <= upper:
            return zone

for file in file_list:
    details = pd.read_csv(file, sep=";")
    
    details.columns = [convert_brackets(col) for col in details.columns]
    details = details[details['speed(m/s)'] != 0]
    details = details.dropna(subset=['speed(m/s)'])

    details['time'] = pd.to_datetime(details.pop('timestamp(s)'), unit='s')
    details.set_index("time", inplace=True)
    
    details['time_since_start'] = 1
    details['time_since_start'] = details['time_since_start'].cumsum().sub(1)
    
    details['hr_zone'] = details['heart_rate(bpm)'].apply(get_zone, zones=hr_zones)
    details['pwr_zone'] = details['power(watts)'].apply(get_zone, zones=power_zones)
    
    details['altitude_diff'] = details['altitude(m)'] - details['altitude(m)'].shift(1)
    details['distance_diff'] = details['distance(m)'] - details['distance(m)'].shift(1)
    details[['altitude_diff', 'distance_diff']] = details[['altitude_diff', 'distance_diff']].fillna(0)
    
    details.drop(details[details['heart_rate(bpm)'] < 110].index, inplace=True)

    details['slope_percent'] = np.where(details['distance_diff'] == 0, 0, details['altitude_diff'] / details['distance_diff'] * 100)
    
    details['power_left'] = details['left_right_balance'] - 128
    details['power_right'] = 100 - details['power_left']
    
    window_size = 3 
    details['potenza_media'] = details['power(watts)'].rolling(window=int(window_size), center=True).mean()
    
    details = details.drop(['left_power_phase(degrees)',
                            'left_power_phase_peak(degrees)',
                            'right_power_phase(degrees)',
                            'right_power_phase_peak(degrees)',
                            'left_right_balance'], axis=1)
    dataframes.append(details)
details = pd.concat(dataframes, ignore_index=False)

In [None]:
details.describe()

In [None]:
details.info()

In [None]:
details['potenza_media'] = details['potenza_media'].fillna(details['potenza_media'].mean())

# Data Visualization

In [None]:
plt.figure(figsize=(24, 6))
plt.plot(details['time_since_start'], details['power(watts)'], label="power")
plt.plot(details['time_since_start'], details['cadence(rpm)'], label="cadence")
plt.plot(details['time_since_start'], details['speed(m/s)'], label="speed")
plt.plot(details['time_since_start'], details['heart_rate(bpm)'], label="bpm")
plt.plot(details['time_since_start'], details['altitude(m)'], label="altitude")
plt.xlabel("tempo")
plt.legend()
plt.grid(True)
plt.show()

# Previsioni

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

### Definizione dei modelli

In [None]:
def get_pipe_grid(type):
    pipelines = {
        "linear": {
            "pipe": Pipeline([
                ("poly", PolynomialFeatures(include_bias=False)),
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "poly__degree": [3,4],
                "std": [StandardScaler()],
            },
            "grid_regressors": [
                {
                    "regressor": [LinearRegression()],
                },
                {
                    "regressor": [Lasso()],
                    "regressor__alpha": [0.01,0.1],
                },
                {
                    "regressor": [Ridge()],
                    "regressor__alpha": [0.01,0.1],
                },
                {
                    "regressor": [ElasticNet()],
                    "regressor__alpha": [0.01,0.1],
                    "regressor__l1_ratio": [0.1],
                },
            ]
        },
        "kernel": {
            "pipe": Pipeline([
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "std": [StandardScaler()],
                "regressor": [KernelRidge()],
                'regressor__alpha': [0.01,0.1],
            },
            "grid_regressors": [
                {
                    "regressor__kernel": ["poly"],
                    'regressor__degree': [4,5],
                },
                {
                    "regressor__kernel": ["rbf"],
                    "regressor__gamma": [0.01,0.1],
                }
            ]
        },
        "tree": {
            "pipe": Pipeline([
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "std": [StandardScaler()],
                "regressor__max_depth": [5,10],
            },
            "grid_regressors": [
                {
                    "regressor": [DecisionTreeRegressor()],
                },
                {
                    "regressor": [RandomForestRegressor()],
                    "regressor__n_estimators": [100,1000],
                },
                {
                    "regressor": [LGBMRegressor()],
                    "regressor__n_estimators": [100,1000],
                    "regressor__learning_rate": [0.01,0.1],
                },
                {
                    "regressor": [XGBRegressor()],
                    "regressor__n_estimators": [100,1000],
                    "regressor__learning_rate": [0.01,0.1],
                }
            ]
        }
    }
    return pipelines[type]["pipe"], [dict(pipelines[type]["grid_common"], **params) for params in pipelines[type]["grid_regressors"]]

In [None]:
def get_trained_model(type, X_train, y_train, randomized=False):
    pipe, grid = get_pipe_grid(type)
    model = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1) if not randomized else RandomizedSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
    model.fit(X_train, y_train)
    return model

## Previsione battito

In [None]:
X_hr = details.drop(['heart_rate(bpm)','potenza_media','temperature(C)','hr_zone','pwr_zone','altitude_diff','distance_diff','left_pco(mm)','right_pco(mm)','power_left','power_right','accumulated_power(watts)'], axis=1)
y_hr = details['heart_rate(bpm)']
X_train, X_val, y_train, y_val = train_test_split(X_hr, y_hr, test_size=1/3, random_state=42)

### Regressione lineare

In [None]:
liner_models_gs = get_trained_model('linear', X_train, y_train)
linear_models_gs_res = pd.DataFrame(liner_models_gs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, liner_models_gs)
linear_models_gs_res

In [None]:
"""
linear_models_rs = get_trained_model('linear', X_train, y_train, randomized=True)
linear_models_rs_res = pd.DataFrame(linear_models_rs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, linear_models_rs)
linear_models_rs_res
"""

### Regressione con funzioni kernel

In [None]:
krm_gs = get_trained_model('kernel', X_train, y_train)
krm_gs_res = pd.DataFrame(krm_gs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, krm_gs)
krm_gs_res

In [None]:
"""
krm_rs = get_trained_model('kernel', X_train, y_train, randomized=True)
krm_rs_res = pd.DataFrame(krm_rs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, krm_rs)
krm_rs_res
"""

### Alberi di regressione

In [None]:
tree_gs = get_trained_model('tree', X_train, y_train)
tree_gs_res = pd.DataFrame(tree_gs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, tree_gs, tree=True)
tree_gs_res

In [None]:
"""
tree_rs = get_trained_model('tree', X_train, y_train, randomized=True)
tree_rs_res = pd.DataFrame(tree_rs.cv_results_).sort_values("mean_test_score", ascending=False)
print_eval(X_val, y_val, tree_rs, tree=True)
tree_rs_res
"""

### Confronto fra modelli

In [None]:
# Creazione del plot scatter
fig, ax = plt.subplots(figsize=(24, 9))

linear_models = pd.concat([linear_models_gs_res, linear_models_gs_res], axis=0)
scatter_linear = ax.scatter(linear_models['mean_fit_time'], linear_models['mean_test_score'], color='blue', label='Modelli lineari e polinomiali')

krm_models = pd.concat([krm_gs_res, krm_gs_res], axis=0)
scatter_kernel = ax.scatter(krm_models['mean_fit_time'], krm_models['mean_test_score'], color='red', label='Modelli Kernel')

tree_models = pd.concat([tree_gs_res, tree_gs_res], axis=0)
scatter_tree = ax.scatter(tree_models['mean_fit_time'], tree_models['mean_test_score'], color='green', label='Modelli Albero')

ax.set_xlabel('Tempo di addestramento (s)')
ax.set_ylabel('Coefficiente R2')
ax.set_title('Confronto tra i modelli')
ax.legend()
plt.ylim(0, 1.1)
plt.show()


### test predizione

In [None]:
X_hr.info()

In [None]:
tree_gs.predict(np.array([90, 100]).reshape(1, -1))

In [None]:
pd.DataFrame({'Feature': X_hr.columns, 'Weight': tree_gs.best_estimator_.named_steps['regressor'].feature_importances_})

### Reti neurali

In [None]:
from sklearn.neural_network import MLPRegressor

pipe = Pipeline([
    ("std", StandardScaler()),
    ("regressor", MLPRegressor())
])

grid = {
    "regressor__hidden_layer_sizes": [4,5,10],
    "regressor__activation": ["identity", "logistic", "tanh", "relu"],
    "regressor__max_iter": [1000, 2000],
    "regressor__batch_size": [20, 200],

}

model = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
model.fit(X_train, y_train)
print_eval(X_val, y_val, model)

## Previsione Potenza

In [None]:
def get_pipe_grid(type):
    pipelines = {
        "linear": {
            "pipe": Pipeline([
                ("poly", PolynomialFeatures(include_bias=False)),
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "poly__degree": [4,5],
                "std": [StandardScaler()],
            },
            "grid_regressors": [
                {
                    "regressor": [LinearRegression()],
                },
                {
                    "regressor": [Lasso()],
                    "regressor__alpha": [0.01,0.1],
                },
                {
                    "regressor": [Ridge()],
                    "regressor__alpha": [0.01,0.1],
                },
                {
                    "regressor": [ElasticNet()],
                    "regressor__alpha": [0.01,0.1],
                    "regressor__l1_ratio": [0.1],
                },
            ]
        },
        "kernel": {
            "pipe": Pipeline([
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "std": [StandardScaler()],
                "regressor": [KernelRidge()],
                'regressor__alpha': [0.01,0.1],
            },
            "grid_regressors": [
                {
                    "regressor__kernel": ["poly"],
                    'regressor__degree': [4,5],
                },
                {
                    "regressor__kernel": ["rbf"],
                    "regressor__gamma": [0.01,0.1],
                }
            ]
        },
        "tree": {
            "pipe": Pipeline([
                ("std", None),
                ("regressor", None)
            ]),
            "grid_common": {
                "std": [StandardScaler()],
                "regressor__max_depth": [5,10],
            },
            "grid_regressors": [
                {
                    "regressor": [DecisionTreeRegressor()],
                },
                {
                    "regressor": [RandomForestRegressor()],
                    "regressor__n_estimators": [100,1000],
                },
                {
                    "regressor": [LGBMRegressor()],
                    "regressor__n_estimators": [100,1000],
                    "regressor__learning_rate": [0.01,0.1],
                },
                {
                    "regressor": [XGBRegressor()],
                    "regressor__n_estimators": [100,1000],
                    "regressor__learning_rate": [0.01,0.1],
                }
            ]
        }
    }
    return pipelines[type]["pipe"], [dict(pipelines[type]["grid_common"], **params) for params in pipelines[type]["grid_regressors"]]

In [None]:
def get_trained_model(type, X_train, y_train, randomized=False):
    pipe, grid = get_pipe_grid(type)
    model = GridSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1) if not randomized else RandomizedSearchCV(pipe, grid, cv=kf, scoring="r2", n_jobs=-1)
    model.fit(X_train, y_train)
    return model

In [None]:
X_watt = details.drop(['power(watts)','potenza_media','temperature(C)','hr_zone','pwr_zone','altitude_diff','distance_diff','left_pco(mm)','right_pco(mm)','power_left','power_right','accumulated_power(watts)'], axis=1)
y_watt = details['power(watts)']
X_train, X_val, y_train, y_val = train_test_split(X_watt, y_watt, test_size=1/3, random_state=42)

### Regressione con modelli lineari e polinomiali

In [None]:
linear_gs = get_trained_model('linear', X_train, y_train)
print_eval(X_val, y_val, linear_gs)
linear_gs_res=pd.DataFrame(linear_gs.cv_results_).sort_values("mean_test_score", ascending=False)
linear_gs_res

### Regressione con funzioni kernel

In [None]:
kernel_gs = get_trained_model('kernel', X_train, y_train)
print_eval(X_val, y_val, kernel_gs)
kernel_gs_res=pd.DataFrame(kernel_gs.cv_results_).sort_values("mean_test_score", ascending=False)
kernel_gs_res

### Regressione con alberi

In [None]:
watt_tree_gs = get_trained_model('tree', X_train, y_train)
print_eval(X_val, y_val, watt_tree_gs, tree=True)
watt_tree_gs_res=pd.DataFrame(watt_tree_gs.cv_results_).sort_values("mean_test_score", ascending=False)
watt_tree_gs_res

### Confronto fra modelli

In [None]:
# Creazione del plot scatter
fig, ax = plt.subplots(figsize=(24, 9))

linear_models = pd.concat([linear_gs_res, linear_gs_res], axis=0)
scatter_linear = ax.scatter(linear_models['mean_fit_time'], linear_models['mean_test_score'], color='blue', label='Modelli lineari e polinomiali')

krm_models = pd.concat([kernel_gs_res, kernel_gs_res], axis=0)
scatter_kernel = ax.scatter(krm_models['mean_fit_time'], krm_models['mean_test_score'], color='red', label='Modelli Kernel')

tree_models = pd.concat([watt_tree_gs_res, watt_tree_gs_res], axis=0)
scatter_tree = ax.scatter(tree_models['mean_fit_time'], tree_models['mean_test_score'], color='green', label='Modelli Albero')

ax.set_xlabel('Tempo di addestramento (s)')
ax.set_ylabel('Coefficiente R2')
ax.set_title('Confronto tra i modelli')
ax.legend()
plt.ylim(0, 1.1)
plt.show()


### test predizione

In [None]:
X_watt.info()

In [None]:
watt_tree_gs.predict(np.array([43000,6,150,90,7200,0]).reshape(1, -1))

# Classificazione

## Classificazione della potenza

In [None]:
X = details[["time_since_start", "power(watts)"]]
y = details['pwr_zone']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42)

diagnosis_color_map = {1: "yellow", 2: "orange", 3: "red", 4: "purple", 5: "blue", 6: "green", 7: "black"}
X_train.plot.scatter("time_since_start", "power(watts)",c=y_train.map(diagnosis_color_map),figsize=(24, 6))

In [None]:
zone_counts = details['pwr_zone'].value_counts()

zone_counts.plot.barh(figsize=(24, 6), legend=None)

plt.xlabel('tempo')
plt.ylabel('Zona')
plt.title('Conteggio del tempo a seconda delle zone')
plt.show()

In [None]:
X = details[['heart_rate(bpm)', 'cadence(rpm)']]
y = details['pwr_zone']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1/3, random_state=42)

model = DecisionTreeClassifier(max_depth=3)
model.fit(X_train, y_train)

plt.figure(figsize=(24, 9))
plot_tree(model, feature_names=X_train.columns.to_list())
print(export_text(model, feature_names=X_train.columns.to_list()))

## Classificazione dei battiti

In [None]:
X = details[["time_since_start", "heart_rate(bpm)"]]
y = details['hr_zone']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42)

diagnosis_color_map = {1: "yellow", 2: "orange", 3: "red", 4: "purple", 5: "blue", 6: "green", 7: "black"}
X_train.plot.scatter("time_since_start", "heart_rate(bpm)",c=y_train.map(diagnosis_color_map),figsize=(24, 6))

In [None]:
zone_counts = details['hr_zone'].value_counts()

zone_counts.plot.barh(figsize=(24, 6), legend=None)

plt.xlabel('tempo')
plt.ylabel('Zona')
plt.title('Conteggio del tempo a seconda delle zone')
plt.show()

In [None]:
X = details[['power(watts)', 'cadence(rpm)']]
y = details['hr_zone']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1/3, random_state=42)

model = DecisionTreeClassifier(max_depth=3)
model.fit(X_train, y_train)

plt.figure(figsize=(24, 9))
plot_tree(model, feature_names=X_train.columns.to_list())
print(export_text(model, feature_names=X_train.columns.to_list()))

# Flask

In [None]:
if not os.path.isdir("templates"):
  os.mkdir("templates")

if not os.path.isdir("models"):
  os.mkdir("models")

if not os.path.isdir("datasets"):
  os.mkdir("datasets")

In [None]:
with open("models/hr_model.bin", "wb") as f:
    pickle.dump(tree_gs.best_estimator_, f)

with open("models/watt_model.bin", "wb") as f:
    pickle.dump(watt_tree_gs.best_estimator_, f)

with open("datasets/hr_dataset.pkl", "wb") as f:
    pickle.dump(X_hr, f)

with open("datasets/watt_dataset.pkl", "wb") as f:
    pickle.dump(X_watt, f)

In [None]:
%%writefile garmin_flask.py
import os.path
import pickle
from flask import Flask, request, render_template
import numpy as np

In [None]:
%%writefile -a garmin_flask.py

app = Flask(__name__)
app.debug = True

In [None]:
%%writefile -a garmin_flask.py

@app.route("/", methods=["GET", "POST"])
def index():
    
    with open('datasets/hr_dataset.pkl', 'rb') as f:
        X_hr = pickle.load(f)
    with open('datasets/watt_dataset.pkl', 'rb') as f:
        X_watt = pickle.load(f)

    if request.method == "POST":
        card = request.form.get("card")

        if card == "hr":
            inputs = []
            for column_name, dtype in X_hr.dtypes.items():
                if dtype == "int64":
                    value = int(request.form[column_name])
                elif dtype == "float64":
                    value = float(request.form[column_name])
                inputs.append(value)
            with app.open_resource("models/hr_model.bin", "rb") as f:
                hr_model = pickle.load(f)
            response = hr_model.predict(np.array(inputs).reshape(1, -1))[0]
            return render_template("index.html", hr_pred=response, X_hr=X_hr, X_watt=X_watt)

        elif card == "watt":
            inputs = []
            for column_name, dtype in X_watt.dtypes.items():
                if dtype == "int64":
                    value = int(request.form[column_name])
                elif dtype == "float64":
                    value = float(request.form[column_name])
                inputs.append(value)
            with app.open_resource("models/watt_model.bin", "rb") as f:
                watt_model = pickle.load(f)
            response = watt_model.predict(np.array(inputs).reshape(1, -1))[0]
            return render_template("index.html", w_pred=response, X_hr=X_hr, X_watt=X_watt)

    return render_template("index.html", X_hr=X_hr, X_watt=X_watt)

In [None]:
%%writefile -a garmin_flask.py

if __name__ == '__main__':
  app.run()

In [None]:
%%writefile templates/index.html

<!doctype html>
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Bootstrap demo</title>
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet"
    integrity="sha384-9ndCyUaIbzAi2FUVXJi0CjmCapSmO7SnpJef0486qhLnuZ2cdeRhO02iuK6FUUVM" crossorigin="anonymous">
</head>

<body class="d-flex flex-column vh-100" data-bs-theme="dark">
  <header class="d-flex justify-content-center shadow py-3">
      <h1>data intensive</h1>
  </header>
  <div class="d-flex align-items-center justify-content-center h-100">
    <div class="card m-auto">
      <div class="card-body">
        <h5 class="card-title">Previsione battiti</h5>
        <form method="POST" action="">
          {% for column in X_hr.columns %}
          <div class="mb-3">
            <label for="{{ column }}" class="form-label">{{ column }}</label>
            <input name="{{ column }}" class="form-control" id="{{ column }}">
          </div>
          {% endfor %}
          <input type="hidden" name="card" value="hr">
          <button type="submit" class="btn btn-primary">Submit</button>
        </form>
      </div>
      <div class="card-footer">
        <p>Risultato: <b>{{ hr_pred }}</b></p>
      </div>
    </div>
    <div class="card m-auto">
      <div class="card-body">
        <h5 class="card-title">Previsione potenza</h5>
        <form method="POST" action="">
          {% for column in X_watt.columns %}
          <div class="mb-3">
            <label for="{{ column }}" class="form-label">{{ column }}</label>
            <input name="{{ column }}" class="form-control" id="{{ column }}">
          </div>
          {% endfor %}
          <input type="hidden" name="card" value="watt">
          <button type="submit" class="btn btn-primary">Submit</button>
        </form>
      </div>
      <div class="card-footer">
        <p>Risultato: <b>{{ w_pred }}</b></p>
      </div>
    </div>
  </div>
</body>

</html>
