In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from fastapi import FastAPI
from pydantic import BaseModel

In [2]:
# Load the processed features
df = pd.read_csv('processed_features.csv')

# Drop rows with missing stress levels
df = df.dropna(subset=['stress_level'])

# Select important features based on domain knowledge
feature_columns = [
    # EDA features
    'EDA_mean', 'EDA_std', 'EDA_kurtosis',
    # HR features
    'HR_mean', 'HR_std', 'HR_rms',
    # TEMP features
    'TEMP_mean', 'TEMP_std', 'TEMP_rms',
    # BVP features
    'BVP_mean', 'BVP_std', 'BVP_rms',
    # ACC features
    'ACC_mag_mean', 'ACC_mag_std',
    # IBI features
    'IBI_mean', 'IBI_std', 'IBI_rmssd'
]



In [3]:
# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(df[feature_columns]), columns=feature_columns)
y = df['stress_level']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [4]:
model = RandomForestClassifier(
       max_depth=10,
       min_samples_split=5,
       min_samples_leaf=2,
       n_estimators=100,
       random_state=42
   )
model.fit(X_train, y_train)
traindata_predicted = model.predict(X_train)

In [5]:
# Calculate training data accuracy score
train_accuracy = accuracy_score(y_train, traindata_predicted)
print(f"Training Accuracy: {train_accuracy:.4f}")


Training Accuracy: 0.8799


In [6]:
testdata_predicted=model.predict(X_test)
test_accuracy = accuracy_score(y_test,testdata_predicted)
print(f"testing Accuracy: {test_accuracy:.4f}")

testing Accuracy: 0.8652


In [7]:
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Average CV score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

Cross-validation scores: [0.85783238 0.86309785 0.86222027 0.85739359 0.86529179]
Average CV score: 0.8612 (+/- 0.0061)


In [8]:
# Save the trained model using joblib
import joblib

# Save the model
model_filename = 'stress_classifier_model.joblib'
joblib.dump(model, model_filename)

# Save the feature columns names for future reference
import json
feature_columns_file = 'feature_columns.json'
with open(feature_columns_file, 'w') as f:
    json.dump(list(feature_columns), f)

# Save the imputer for preprocessing new data
imputer_filename = 'imputer.joblib'
joblib.dump(imputer, imputer_filename)

print(f"Model saved as: {model_filename}")
print(f"Feature columns saved as: {feature_columns_file}")
print(f"Imputer saved as: {imputer_filename}")


Model saved as: stress_classifier_model.joblib
Feature columns saved as: feature_columns.json
Imputer saved as: imputer.joblib
