1. Load and Preprocess Data

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# Load the data
df = pd.read_excel("StressLevels_Weather_Data_ WithExams_Last (1).xlsx")
df['Date'] = pd.to_datetime(df['Date'])

# Enrich features
df['is_weekend'] = df['Date'].dt.dayofweek >= 5
df['StressLevelBefore'] = df.groupby('Date')['Stress Level'].shift(1)
df['StressLevelDifference'] = df['Stress Level'] - df['StressLevelBefore']
df = df.dropna()

# Encode categorical weather condition
weather_encoded = pd.get_dummies(df['Weather Condition'], prefix='Weather', drop_first=True)

# Select features
X = pd.concat([df[['Temperature (°C)', 'StressLevelBefore']], weather_encoded], axis=1)
y = df['StressLevelDifference']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

2. Train Models

In [None]:
models = {
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42, n_estimators=100)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {"MSE": mse, "R²": r2}
    print(f"{name} Results:\n - Mean Squared Error: {mse:.2f}\n - R² Score: {r2:.2f}")

    # Plot predictions
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.6, label=name)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--', label="Ideal Fit")
    plt.title(f"{name}: Actual vs. Predicted")
    plt.xlabel("Actual Stress Level Difference")
    plt.ylabel("Predicted Stress Level Difference")
    plt.legend()
    plt.grid(True)
    plt.show()

3. Model Evaluation - Identify Best Model