In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils import resample

In [2]:
# Generate synthetic data for a month (30 days)
days = 30
data = {
    "date": pd.date_range(start="2024-01-01", periods=days),
    "steps": np.random.randint(3000, 15000, days),
    "heart_rate": np.random.randint(60, 120, days),
    "sleep_hours": np.round(np.random.normal(7, 1.5, days), 2),
    "calories_consumed": np.random.randint(1800, 2800, days),
    "mood": np.random.choice(["happy", "neutral", "stressed"], days)
}
df = pd.DataFrame(data)

In [3]:
# One-hot encode mood
df = pd.get_dummies(df, columns=["mood"])

In [4]:
# Calculate a 7-day moving average for steps
df["steps_7d_avg"] = df["steps"].rolling(window=7).mean().fillna(df["steps"].mean())

In [5]:
# Create a binary target for "fatigue risk" based on heart rate and sleep hours
df["fatigue_risk"] = ((df["heart_rate"] > 100) & (df["sleep_hours"] < 6)).astype(int)

In [6]:
# Handle class imbalance by resampling
df_majority = df[df["fatigue_risk"] == 0]
df_minority = df[df["fatigue_risk"] == 1]

In [7]:
df_minority_upsampled = resample(df_minority, replace=True, n_samples=len(df_majority), random_state=42)
df_balanced = pd.concat([df_majority, df_minority_upsampled])

In [8]:
# Split data
X = df_balanced.drop(["date", "fatigue_risk"], axis=1)
y = df_balanced["fatigue_risk"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Grid search for hyperparameter tuning
param_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10]
}

In [11]:
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

In [12]:
# Evaluate the model
y_pred = best_model.predict(X_test)
print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         5

    accuracy                           1.00        11
   macro avg       1.00      1.00      1.00        11
weighted avg       1.00      1.00      1.00        11

Confusion Matrix:
 [[6 0]
 [0 5]]


In [13]:
import pickle
with open("best_model.pkl", "wb") as f:
    pickle.dump(best_model, f)