In [None]:
# STEP 1: Install required packages
!pip install -q pandas scikit-learn joblib

# STEP 2: Upload your dataset CSV
from google.colab import files
uploaded = files.upload()

# STEP 3: Load dataset
import pandas as pd

filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

print("✅ Dataset loaded. Shape:", df.shape)
print("\n🔍 Columns:\n", df.columns.tolist())

# STEP 4: Define behavioral features and targets
behavioral_features = [
    'sleep_hours',
    'screen_time',
    'activity_level',
    'social_interaction',
    'stress_level'
]

target_phq = 'phq_total'
target_gad = 'gad_total'

# STEP 5: Map string values to numeric if necessary
string_to_num_map = {
    'Very Low': 1,
    'Low': 2,
    'Moderate': 3,
    'High': 4,
    'Very High': 5
}

for col in behavioral_features:
    if df[col].dtype == 'object':
        df[col] = df[col].map(string_to_num_map)

# STEP 6: Drop missing values in selected columns
df_clean = df.dropna(subset=behavioral_features + [target_phq, target_gad])

# STEP 7: Train ML models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np
import joblib

X = df_clean[behavioral_features]
y_phq = df_clean[target_phq]
y_gad = df_clean[target_gad]

# Train PHQ model
X_train_phq, X_test_phq, y_train_phq, y_test_phq = train_test_split(X, y_phq, test_size=0.2, random_state=42)
phq_model = RandomForestRegressor(random_state=42)
phq_model.fit(X_train_phq, y_train_phq)
phq_predictions = phq_model.predict(X_test_phq)
phq_rmse = np.sqrt(mean_squared_error(y_test_phq, phq_predictions))

# Train GAD model
X_train_gad, X_test_gad, y_train_gad, y_test_gad = train_test_split(X, y_gad, test_size=0.2, random_state=42)
gad_model = RandomForestRegressor(random_state=42)
gad_model.fit(X_train_gad, y_train_gad)
gad_predictions = gad_model.predict(X_test_gad)
gad_rmse = np.sqrt(mean_squared_error(y_test_gad, gad_predictions))

# Save models
joblib.dump(phq_model, "phq_model_behavioral.pkl")
joblib.dump(gad_model, "gad_model_behavioral.pkl")

# Show results
print(f"\n✅ PHQ Model RMSE: {phq_rmse:.2f}")
print(f"✅ GAD Model RMSE: {gad_rmse:.2f}")
print("💾 Models saved successfully!")

# STEP 8: Download trained models
files.download("phq_model_behavioral.pkl")
files.download("gad_model_behavioral.pkl")


Saving mental_health_dataset_15000_noisy_corrected.csv to mental_health_dataset_15000_noisy_corrected (2).csv
✅ Dataset loaded. Shape: (15000, 31)

🔍 Columns:
 ['PHQ1', 'PHQ2', 'PHQ3', 'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'phq_total', 'GAD1', 'GAD2', 'GAD3', 'GAD4', 'GAD5', 'GAD6', 'GAD7', 'gad_total', 'CAGE1', 'CAGE2', 'CAGE3', 'CAGE4', 'cage_total', 'sleep_hours', 'screen_time', 'Depression_Level', 'Anxiety_Level', 'Alcohol_Risk', 'activity_level', 'social_interaction', 'stress_level']

✅ PHQ Model RMSE: 2.86
✅ GAD Model RMSE: 2.43
💾 Models saved successfully!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>