In [11]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

In [15]:
# Load the dataset again
data = pd.read_csv('mental_health_screen_time_dataset.csv')

data['Mental_Health_Issues'] = ((data['Stress_Level'] >= 7) | (data['Well_Being_Score'] <= 4)).astype(int)

# Define features and target
features = [
    'Daily_Screen_Time',
    'App_Social_Media_Time',
    'App_Work_Time',
    'App_Entertainment_Time',
    'Phone_Unlocks',
    'Sleep_Duration',
    'Mood_Rating'
]
target = 'Mental_Health_Issues'

# Address class imbalance using oversampling
data_majority = data[data['Mental_Health_Issues'] == 1]
data_minority = data[data['Mental_Health_Issues'] == 0]

data_minority_upsampled = resample(
    data_minority,
    replace=True,
    n_samples=len(data_majority),
    random_state=42
)

data_balanced = pd.concat([data_majority, data_minority_upsampled])

In [25]:
# Splitting balanced data into features and target
X_balanced = data_balanced[features]
y_balanced = data_balanced[target]

# Feature scaling
scaler = StandardScaler()
X_balanced_scaled = scaler.fit_transform(X_balanced)

# Split the balanced dataset
X_train_balanced, X_test_balanced, y_train_balanced, y_test_balanced = train_test_split(
    X_balanced_scaled, y_balanced, test_size=0.2, random_state=42
)

# Train a Random Forest classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_balanced, y_train_balanced)

# Evaluate on the original test set (scaled)
X_test = data[features]
y_test = data[target]
X_test_scaled = scaler.transform(X_test)
y_pred_balanced = rf_model.predict(X_test_scaled)

# Calculate accuracy and generate classification report
accuracy_balanced = accuracy_score(y_test, y_pred_balanced)
report_balanced = classification_report(y_test, y_pred_balanced)

'              precision    recall  f1-score   support\n\n           0       0.92      0.79      0.85      1836\n           1       0.89      0.96      0.92      3164\n\n    accuracy                           0.90      5000\n   macro avg       0.90      0.88      0.89      5000\nweighted avg       0.90      0.90      0.90      5000\n'

In [27]:
accuracy_balanced

0.8982

In [29]:
report_balanced

'              precision    recall  f1-score   support\n\n           0       0.92      0.79      0.85      1836\n           1       0.89      0.96      0.92      3164\n\n    accuracy                           0.90      5000\n   macro avg       0.90      0.88      0.89      5000\nweighted avg       0.90      0.90      0.90      5000\n'