In [None]:
# setup e test librerie
%pip install numpy
%pip install pandas
%pip install matplotlib
%pip install scikit-learn
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

# Loading Datasets

In [None]:
dailyCalories = pd.read_csv("Fitabase Data 4.12.16-5.12.16/dailyCalories_merged.csv")
dailyIntensities = pd.read_csv("Fitabase Data 4.12.16-5.12.16/dailyIntensities_merged.csv")
dailySteps = pd.read_csv("Fitabase Data 4.12.16-5.12.16/dailySteps_merged.csv")
dailyDistances = pd.read_csv("Fitabase Data 4.12.16-5.12.16/dailyDistances_merged.csv")
dailySleep = pd.read_csv("Fitabase Data 4.12.16-5.12.16/sleepDay_merged.csv")
heartrateSeconds = pd.read_csv("Fitabase Data 4.12.16-5.12.16/heartrate_seconds_merged.csv")
weightLog = pd.read_csv("Fitabase Data 4.12.16-5.12.16/weightLogInfo_merged.csv")

In [None]:
from functools import reduce 

# merge dei dataset
activity_dfs = [dailyCalories, dailyIntensities, dailySteps, dailyDistances]
activity_dfs = [df.assign(Day=pd.to_datetime(df['Day'])) for df in activity_dfs]

dailyActivities = reduce(lambda left,right: pd.merge(left,right,on=['Id', 'Day'], how='outer'), activity_dfs)

# raffinamento dei dati
dailyActivities.set_index(['Id'], inplace=True)
dailyActivities['Day'] = pd.to_datetime(dailyActivities['Day'])
dailyActivities.head(5)

## Data Visualization

In [None]:
dailyActivities.plot.scatter(x='StepTotal', y='Calories')

In [None]:
dailySleep.plot.scatter(x='Day', y='TotalMinutesAsleep')

In [None]:
dailySleep.plot.scatter(x='TotalTimeInBed', y='TotalMinutesAsleep')

### Kcal comsumption prediction

In [None]:
# MSE e R^2 sono incluse in scikit-learn
from sklearn.metrics import mean_squared_error, r2_score

# definisco l'errore relativo
def relative_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))

def print_eval(X, y, model):
    preds = model.predict(X)
    mse = mean_squared_error(y, preds)
    re = relative_error(y, preds)
    r2 = r2_score(y, preds)
    print(f"   Mean squared error: {mse:.5}")
    print(f"       Relative error: {re:.5%}")
    print(f"R-squared coefficient: {r2:.5}")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

X = dailyActivities.drop(["Day", "Calories", "TrackerDistance", "LoggedActivitiesDistance"], axis=1)
y = dailyActivities["Calories"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
print_eval(X_val, y_val, model)

In [None]:
model_b = Pipeline([
    ("poly",   PolynomialFeatures(degree=1, include_bias=False)),
    ("linreg", LinearRegression())
])
model_b.fit(X_train, y_train)
print_eval(X_val, y_val, model_b)

In [None]:
model_c = Pipeline([
    ("poly",   PolynomialFeatures(degree=1, include_bias=False)),
    ("scale",  StandardScaler()),
    ("linreg", LinearRegression())
])
model_c.fit(X_train, y_train)
print_eval(X_val, y_val, model_c)

In [None]:
rrm = Pipeline([
    ("poly", PolynomialFeatures(degree=1, include_bias=False)),
    ("scale", StandardScaler()),
    ("ridge", Ridge(alpha=0.1))
])
rrm.fit(X_train, y_train)
print_eval(X_val, y_val, rrm)

In [None]:
def test_regression(degree, alpha):
    rrm = Pipeline([
        ("poly", PolynomialFeatures(degree=degree, include_bias=False)),
        ("scale", StandardScaler()),
        ("ridge", Ridge(alpha=alpha))
    ])
    rrm.fit(X_train, y_train)
    return rrm.score(X_val, y_val)

In [None]:
res_degree = np.arange(1, 10)
res_low_reg = np.array([test_regression(d, 0.01) for d in res_degree])

In [None]:
res_high_reg = np.array([test_regression(d, 10) for d in res_degree])

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(res_degree, res_low_reg, "ro-")
plt.plot(res_degree, res_high_reg, "bo-")
plt.grid()
plt.xlabel("Grado regr. polinomiale")
plt.ylabel("Score R²")
# aggiungiamo una legenda al grafico
plt.legend(["α = 0.01", "α = 10"], loc="lower right");

In [None]:
scaled = Pipeline([
    ("scale", StandardScaler()),
    ("lr", LinearRegression())
])
scaled.fit(X_train, y_train)

simple = LinearRegression()
simple.fit(X_train, y_train)

ridge = Pipeline([
    ("scale", StandardScaler()),
    ("ridge", Ridge(alpha=0.1))
])
ridge.fit(X_train, y_train)

pd.DataFrame({
    "linear": simple.coef_,
    "ridge": ridge.named_steps["ridge"].coef_,
    "scaled": scaled.named_steps["lr"].coef_
}, index=X_train.columns)

In [None]:
lasso = Pipeline([
    ("scale", StandardScaler()),
    ("regr", Lasso(alpha=1))
])
lasso.fit(X_train, y_train)
print_eval(X_val, y_val, lasso)
pd.Series(lasso.named_steps["regr"].coef_, X_train.columns)

In [None]:
lasso2 = Pipeline([
    ("scale", StandardScaler()),
    ("regr", Lasso(alpha=0.2))
])
lasso2.fit(X_train, y_train)
print_eval(X_val, y_val, lasso2)
pd.Series(lasso2.named_steps["regr"].coef_, X_train.columns)

In [None]:
elasticNet = Pipeline([
    ("scale",  StandardScaler()),
    ("regr", ElasticNet(alpha=0.2, l1_ratio=0.1))
])
elasticNet.fit(X_train, y_train)
print_eval(X_val, y_val, elasticNet)

In [None]:
def elastic_net_with_alphas(alpha_l2, alpha_l1):
    alpha = alpha_l1 + alpha_l2
    l1_ratio = alpha_l1 / alpha
    return ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

In [None]:
elasticNetAlphas = Pipeline([
    ("scale", StandardScaler()),
    ("regr", elastic_net_with_alphas(1, 0.1))
])
elasticNetAlphas.fit(X_train, y_train)
print_eval(X_val, y_val, elasticNetAlphas)

In [None]:
def multivariate_scaled_elasticnet(degree):
    return Pipeline([
        ("poly", PolynomialFeatures(degree, include_bias=False)),
        ("scale", StandardScaler()),
        ("regr", ElasticNet(alpha=0.5, l1_ratio=0.2))
    ])

In [None]:
mven = multivariate_elastic_net(2)
mven.fit(X_train, y_train)
print_eval(X_val, y_val, mven)

In [None]:
mven = multivariate_elastic_net(4)
mven.fit(X_train, y_train)
print_eval(X_val, y_val, mven)

In [None]:
mven = multivariate_elastic_net(8)
mven.fit(X_train, y_train)
print_eval(X_val, y_val, mven)

In [None]:
from sklearn.kernel_ridge import KernelRidge
model = Pipeline([
    ("scale", StandardScaler()),
    ("regr",  KernelRidge(alpha=20, kernel="poly", degree=5))
])
%time model.fit(X_train, y_train)
print_eval(X_val, y_val, model)

In [None]:
rbf_kernel = Pipeline([
    ("scale", StandardScaler()),
    ("regr",  KernelRidge(alpha=20, kernel="rbf", gamma=0.01))
])
%time model.fit(X_train, y_train)
print_eval(X_val, y_val, model)

In [None]:
from sklearn.model_selection import cross_validate
cv_result = cross_validate(rbf_kernel, X, y, cv=KFold(5, shuffle=True, random_state=42), return_train_score=True)

In [None]:
pd.DataFrame(cv_result).describe()

# Sleep Analysis


In [None]:
dailySleep["SleepClassification"] = "Buono"  # Imposta il valore predefinito a "Buono"
# Applica la regola per classificare il sonno come "cattivo" se TotalMinutesAsleep è inferiore al 50% di TotalTimeInBed
dailySleep.loc[dailySleep["TotalMinutesAsleep"] <  0.8* dailySleep["TotalTimeInBed"], "SleepClassification"] = "Cattivo"

In [None]:
from sklearn.model_selection import train_test_split
# Seleziona le colonne di input e l'etichetta di classe
#X = daily_data[["TotalSleepRecords", "TotalMinutesAsleep", "TotalTimeInBed"]]
X = dailySleep[["TotalMinutesAsleep", "TotalTimeInBed"]]
y = dailySleep['SleepClassification']
# Dividi il dataset in training set e test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42)


In [None]:
diagnosis_color_map = {"Buono": "blue", "Cattivo": "red"}
X_train.plot.scatter("TotalMinutesAsleep", "TotalTimeInBed", c=y_train.map(diagnosis_color_map), figsize=(8, 6));

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X2dn_train = scaler.fit_transform(X_train)
X2dn_val = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver="saga", random_state=42)
model.fit(X2dn_train, y_train)
mx1, mx2 = np.meshgrid(np.linspace(-3.5, 4.5, 100), np.linspace(-3.5, 4.5, 100))
my = model.predict_proba(np.c_[mx1.ravel(), mx2.ravel()])[:, 1].reshape(mx1.shape)
plt.figure(figsize=(9, 6))
plt.contourf(mx1, mx2, my, cmap="summer")
plt.scatter(*X2dn_train.T, c=y_train.map(diagnosis_color_map))
plt.colorbar();

In [None]:
model.score(X2dn_val, y_test)