In [5]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

MODEL_OUT = "stress_rf_model.pkl"

df = pd.read_csv("stress_dataset.csv")

X = df.drop(columns=["Stress Level"])
y = df["Stress Level"].astype(str)

num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()

numeric_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ohe", OneHotEncoder(handle_unknown="ignore", sparse=False))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_cols),
        ("cat", categorical_transformer, cat_cols)
    ],
    remainder="drop"
)

model = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight="balanced"
)

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", model)
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

joblib.dump(pipeline, MODEL_OUT)
print(f"\nSaved model to {MODEL_OUT}")


Accuracy: 1.0000

Classification Report:
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000       101
           1     1.0000    1.0000    1.0000       158
           2     1.0000    1.0000    1.0000       142

    accuracy                         1.0000       401
   macro avg     1.0000    1.0000    1.0000       401
weighted avg     1.0000    1.0000    1.0000       401

Confusion Matrix:
[[101   0   0]
 [  0 158   0]
 [  0   0 142]]

Saved model to stress_rf_model.pkl


In [3]:
df.columns

Index(['Humidity', 'Temperature', 'Step count', 'Stress Level', 'Sleep_Hours',
       'Water_Intake_Liters', 'Screen_Time_Hours', 'Mood_Level'],
      dtype='object')