In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [9]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/Psychological health /Psychological health dataset")

# Show first few rows
df.head()

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,Your current year of Study,What is your CGPA?,Marital status,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


In [10]:
# Drop Timestamp (not useful)

df.drop(columns=["Timestamp"], inplace=True)

# Clean column names (remove spaces, make lowercase, etc.)

df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df.head()

Unnamed: 0,choose_your_gender,age,what_is_your_course?,your_current_year_of_study,what_is_your_cgpa?,marital_status,do_you_have_depression?,do_you_have_anxiety?,do_you_have_panic_attack?,did_you_seek_any_specialist_for_a_treatment?
0,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


In [11]:
#  Mental Health Label: If student has Depression, Anxiety, or Panic attack → "Has Issue"

df['has_issue'] = df[[
'do_you_have_depression?',
'do_you_have_anxiety?',
'do_you_have_panic_attack?'
]].apply(lambda row: 'Has Issue' if 'Yes' in row.values else 'No Issue', axis=1)

# 🎓 CGPA Label: Map CGPA ranges to performance classes

cgpa_map = {
'0 - 1.99': 'Low',
'2.00 - 2.49': 'Low',
'2.50 - 2.99': 'Average',
'3.00 - 3.49': 'Average',
'3.50 - 4.00': 'High',
'4': 'High',
'4.50': 'High',
'4.78': 'High'
}
df['cgpa_class'] = df['what_is_your_cgpa?'].map(cgpa_map)

# Drop original mental health symptom columns and CGPA range (we now have labels)

df.drop(columns=[
'do_you_have_depression?',
'do_you_have_anxiety?',
'do_you_have_panic_attack?',
'what_is_your_cgpa?'
], inplace=True)

# See cleaned dataset

df.head()


Unnamed: 0,choose_your_gender,age,what_is_your_course?,your_current_year_of_study,marital_status,did_you_seek_any_specialist_for_a_treatment?,has_issue,cgpa_class
0,Female,18.0,Engineering,year 1,No,No,Has Issue,Average
1,Male,21.0,Islamic education,year 2,No,No,Has Issue,Average
2,Male,19.0,BIT,Year 1,No,No,Has Issue,Average
3,Female,22.0,Laws,year 3,Yes,No,Has Issue,Average
4,Male,23.0,Mathemathics,year 4,No,No,No Issue,Average


In [13]:
#  Mental Health Model Dataset

df_mental = df.copy()
X_mental = df_mental.drop(columns=['has_issue', 'cgpa_class'])
y_mental = df_mental['has_issue']

# 🎓 CGPA Model Dataset

df_cgpa = df.copy()
X_cgpa = df_cgpa.drop(columns=['cgpa_class'])
y_cgpa = df_cgpa['cgpa_class']

# Display sample rows

print("Mental Health Dataset:")
display(pd.concat([X_mental, y_mental], axis=1).head())

print("\nCGPA Class Dataset:")
display(pd.concat([X_cgpa, y_cgpa], axis=1).head())

Mental Health Dataset:


Unnamed: 0,choose_your_gender,age,what_is_your_course?,your_current_year_of_study,marital_status,did_you_seek_any_specialist_for_a_treatment?,has_issue
0,Female,18.0,Engineering,year 1,No,No,Has Issue
1,Male,21.0,Islamic education,year 2,No,No,Has Issue
2,Male,19.0,BIT,Year 1,No,No,Has Issue
3,Female,22.0,Laws,year 3,Yes,No,Has Issue
4,Male,23.0,Mathemathics,year 4,No,No,No Issue



CGPA Class Dataset:


Unnamed: 0,choose_your_gender,age,what_is_your_course?,your_current_year_of_study,marital_status,did_you_seek_any_specialist_for_a_treatment?,has_issue,cgpa_class
0,Female,18.0,Engineering,year 1,No,No,Has Issue,Average
1,Male,21.0,Islamic education,year 2,No,No,Has Issue,Average
2,Male,19.0,BIT,Year 1,No,No,Has Issue,Average
3,Female,22.0,Laws,year 3,Yes,No,Has Issue,Average
4,Male,23.0,Mathemathics,year 4,No,No,No Issue,Average


In [16]:
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

# ------------------------------

# # Encoding for Mental Health Model

# ------------------------------

mental_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
X_mental_encoded = pd.DataFrame(
mental_encoder.fit_transform(X_mental),
columns=X_mental.columns
)

label_encoder_mental = LabelEncoder()
y_mental_encoded = label_encoder_mental.fit_transform(y_mental)

# ------------------------------

# Encoding for CGPA Model

# ------------------------------

cgpa_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
X_cgpa_encoded = pd.DataFrame(
cgpa_encoder.fit_transform(X_cgpa),
columns=X_cgpa.columns
)

label_encoder_cgpa = LabelEncoder()
y_cgpa_encoded = label_encoder_cgpa.fit_transform(y_cgpa)

print("✅ Encoding done for both models.")

# ------------------------------

# # Check and Fill Missing Values

# ------------------------------

print("🔍 Missing values in X_mental_encoded:\n", X_mental_encoded.isnull().sum())
print("🔍 Missing values in X_cgpa_encoded:\n", X_cgpa_encoded.isnull().sum())

# Use .loc to avoid pandas chained assignment warning

for col in X_mental_encoded.columns:
    if X_mental_encoded[col].isnull().sum() > 0:
        mode_val = X_mental_encoded[col].mode()[0]
        X_mental_encoded.loc[:, col] = X_mental_encoded[col].fillna(mode_val)

for col in X_cgpa_encoded.columns:
    if X_cgpa_encoded[col].isnull().sum() > 0:
        mode_val = X_cgpa_encoded[col].mode()[0]
        X_cgpa_encoded.loc[:, col] = X_cgpa_encoded[col].fillna(mode_val)

✅ Encoding done for both models.
🔍 Missing values in X_mental_encoded:
 choose_your_gender                              0
age                                             1
what_is_your_course?                            0
your_current_year_of_study                      0
marital_status                                  0
did_you_seek_any_specialist_for_a_treatment?    0
dtype: int64
🔍 Missing values in X_cgpa_encoded:
 choose_your_gender                              0
age                                             1
what_is_your_course?                            0
your_current_year_of_study                      0
marital_status                                  0
did_you_seek_any_specialist_for_a_treatment?    0
has_issue                                       0
dtype: int64


In [18]:
from imblearn.over_sampling import SMOTE
from collections import Counter
import numpy as np

# 1. Check class distribution

original_counts = Counter(y_cgpa_encoded)
print("🎯 Original CGPA class distribution:", original_counts)

# 2. Filter out classes with less than 2 samples

valid_classes = [cls for cls, count in original_counts.items() if count >= 2]
mask = np.isin(y_cgpa_encoded, valid_classes)

X_cgpa_filtered = X_cgpa_encoded[mask]
y_cgpa_filtered = y_cgpa_encoded[mask]

# 3. Apply SMOTE

smote_cgpa = SMOTE(random_state=42, k_neighbors=1)
X_cgpa_sm, y_cgpa_sm = smote_cgpa.fit_resample(X_cgpa_filtered, y_cgpa_filtered)

# 4. Show balanced class distribution

print("✅ 🎓 Balanced CGPA classes after filtering:", Counter(y_cgpa_sm))

🎯 Original CGPA class distribution: Counter({np.int64(0): 47, np.int64(1): 47, np.int64(2): 6, np.int64(3): 1})
✅ 🎓 Balanced CGPA classes after filtering: Counter({np.int64(0): 47, np.int64(1): 47, np.int64(2): 47})


In [21]:
from sklearn.model_selection import train_test_split

# --- Split Mental Health Dataset ---

X_mental_train, X_mental_test, y_mental_train, y_mental_test = train_test_split(
X_mental_encoded, y_mental_encoded, test_size=0.2, random_state=42, stratify=y_mental_encoded
)

# --- Split CGPA Dataset ---

X_cgpa_train, X_cgpa_test, y_cgpa_train, y_cgpa_test = train_test_split(
X_cgpa_sm, y_cgpa_sm, test_size=0.2, random_state=42, stratify=y_cgpa_sm
)

In [24]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

# Train Mental Health Model

model_mental = xgb.XGBClassifier(
random_state=42,
use_label_encoder=False,
eval_metric='logloss'
)
model_mental.fit(X_mental_train, y_mental_train)

# Predict and Evaluate

y_mental_pred = model_mental.predict(X_mental_test)
mental_acc = accuracy_score(y_mental_test, y_mental_pred)

print(f"🧠 Mental Health Accuracy: {round(mental_acc * 100, 2)}%")
print("📋 Mental Health Classification Report:")
print(classification_report(y_mental_test, y_mental_pred))


🧠 Mental Health Accuracy: 52.38%
📋 Mental Health Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.54      0.58        13
           1       0.40      0.50      0.44         8

    accuracy                           0.52        21
   macro avg       0.52      0.52      0.51        21
weighted avg       0.55      0.52      0.53        21



Parameters: { "use_label_encoder" } are not used.



In [25]:
# Train CGPA Model

model_cgpa = xgb.XGBClassifier(
random_state=42,
use_label_encoder=False,
eval_metric='mlogloss'
)
model_cgpa.fit(X_cgpa_train, y_cgpa_train)

# Predict and Evaluate

y_cgpa_pred = model_cgpa.predict(X_cgpa_test)
cgpa_acc = accuracy_score(y_cgpa_test, y_cgpa_pred)

print(f"🎓 CGPA Prediction Accuracy: {round(cgpa_acc * 100, 2)}%")
print("📋 CGPA Classification Report:")
print(classification_report(y_cgpa_test, y_cgpa_pred))

🎓 CGPA Prediction Accuracy: 51.72%
📋 CGPA Classification Report:
              precision    recall  f1-score   support

           0       0.30      0.33      0.32         9
           1       0.38      0.30      0.33        10
           2       0.82      0.90      0.86        10

    accuracy                           0.52        29
   macro avg       0.50      0.51      0.50        29
weighted avg       0.50      0.52      0.51        29



## Tuning for Mental Health Model

In [26]:
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb

# Define parameter grid for Mental Health model
param_grid_mental = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 4, 5, 6],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Initialize base model
xgb_mental = xgb.XGBClassifier(
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

# Randomized search
search_mental = RandomizedSearchCV(
    xgb_mental,
    param_distributions=param_grid_mental,
    n_iter=20,
    cv=3,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

# Fit on full SMOTE-balanced dataset
search_mental.fit(X_mental_encoded, y_mental_encoded)

# Best model
best_model_mental = search_mental.best_estimator_
print("✅ Best Parameters (Mental Health):", search_mental.best_params_)


Fitting 3 folds for each of 20 candidates, totalling 60 fits
✅ Best Parameters (Mental Health): {'subsample': 1.0, 'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.01, 'colsample_bytree': 0.6}


Parameters: { "use_label_encoder" } are not used.



# Tuning for CGPA Model

In [29]:
# Define parameter grid for CGPA model
param_grid_cgpa = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 4, 5, 6],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Initialize base model
xgb_cgpa = xgb.XGBClassifier(
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

# Randomized search
search_cgpa = RandomizedSearchCV(
    xgb_cgpa,
    param_distributions=param_grid_cgpa,
    n_iter=20,
    cv=3,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

# Fit on SMOTE-balanced CGPA dataset
search_cgpa.fit(X_cgpa_sm, y_cgpa_sm)

# Best model
best_model_cgpa = search_cgpa.best_estimator_
print("✅ Best Parameters (CGPA):", search_cgpa.best_params_)


Fitting 3 folds for each of 20 candidates, totalling 60 fits
✅ Best Parameters (CGPA): {'subsample': 0.6, 'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.1, 'colsample_bytree': 0.8}


Parameters: { "use_label_encoder" } are not used.



# Evaluate the Tuned Models

In [30]:
from sklearn.metrics import accuracy_score, classification_report

# MENTAL HEALTH
y_mental_test_pred = best_model_mental.predict(X_mental_test)
print("🧠 Tuned Mental Health Accuracy:", round(accuracy_score(y_mental_test, y_mental_test_pred) * 100, 2), "%")
print(classification_report(y_mental_test, y_mental_test_pred))

# CGPA
y_cgpa_test_pred = best_model_cgpa.predict(X_cgpa_test)
print("🎓 Tuned CGPA Accuracy:", round(accuracy_score(y_cgpa_test, y_cgpa_test_pred) * 100, 2), "%")
print(classification_report(y_cgpa_test, y_cgpa_test_pred))


🧠 Tuned Mental Health Accuracy: 71.43 %
              precision    recall  f1-score   support

           0       0.68      1.00      0.81        13
           1       1.00      0.25      0.40         8

    accuracy                           0.71        21
   macro avg       0.84      0.62      0.61        21
weighted avg       0.80      0.71      0.66        21

🎓 Tuned CGPA Accuracy: 86.21 %
              precision    recall  f1-score   support

           0       0.80      0.89      0.84         9
           1       0.80      0.80      0.80        10
           2       1.00      0.90      0.95        10

    accuracy                           0.86        29
   macro avg       0.87      0.86      0.86        29
weighted avg       0.87      0.86      0.86        29



# Create SHAP Explainers

In [31]:
import shap

# Create SHAP Explainers
explainer_mental = shap.Explainer(best_model_mental, X_mental_test)
shap_values_mental = explainer_mental(X_mental_test)

explainer_cgpa = shap.Explainer(best_model_cgpa, X_cgpa_test)
shap_values_cgpa = explainer_cgpa(X_cgpa_test)


# Mental Health Prediction Explanation Function

In [32]:
def explain_mental_health_prediction(index):
    print(f"\n🧠 Prediction Explanation for Student {index}:")

    row = X_mental_test.iloc[index:index+1]
    decoded_row = {
        col: mental_encoder.categories_[i][int(row.iloc[0, i])] if row.iloc[0, i] >= 0 else "Unknown"
        for i, col in enumerate(X_mental_test.columns)
    }

    actual = label_encoder_mental.inverse_transform([y_mental_test[index]])[0]
    predicted = label_encoder_mental.inverse_transform([best_model_mental.predict(row)[0]])[0]

    print(f"✅ Actual: {actual}")
    print(f"🤖 Predicted: {predicted}")

    shap_row = shap_values_mental[index].values
    top_features = abs(shap_row).argsort()[::-1][:5]

    print("\n📌 Top Factors Influencing This Prediction:")
    for i in top_features:
        feat = X_mental_test.columns[i]
        val = decoded_row[feat]
        direction = "increased" if shap_row[i] > 0 else "decreased"
        print(f"- {feat} = {val} → {direction} the likelihood of mental health issue")

    shap.plots.bar(shap_values_mental[index], max_display=5)


# CGPA Class Prediction Explanation Function

In [33]:
def explain_cgpa_prediction(index):
    print(f"\n🎓 Prediction Explanation for Student {index}:")

    row = X_cgpa_test.iloc[index:index+1]
    decoded_row = {
        col: cgpa_encoder.categories_[i][int(row.iloc[0, i])] if row.iloc[0, i] >= 0 else "Unknown"
        for i, col in enumerate(X_cgpa_test.columns)
    }

    actual = label_encoder_cgpa.inverse_transform([y_cgpa_test[index]])[0]
    predicted = label_encoder_cgpa.inverse_transform([best_model_cgpa.predict(row)[0]])[0]

    print(f"✅ Actual: {actual}")
    print(f"🤖 Predicted: {predicted}")

    shap_row = shap_values_cgpa[index].values
    top_features = abs(shap_row).argsort()[::-1][:5]

    print("\n📌 Top Factors Influencing This CGPA Prediction:")
    for i in top_features:
        feat = X_cgpa_test.columns[i]
        val = decoded_row[feat]
        direction = "increased" if shap_row[i] > 0 else "decreased"
        print(f"- {feat} = {val} → {direction} the likelihood of predicted CGPA class")

    shap.plots.bar(shap_values_cgpa[index], max_display=5)


In [35]:
import shap
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import OrdinalEncoder

# ✅ These should already be defined in your notebook:
# best_model_mental, best_model_cgpa
# mental_encoder, cgpa_encoder
# label_encoder_mental, label_encoder_cgpa

mental_input_features = ['choose_your_gender', 'age', 'what_is_your_course?',
                         'your_current_year_of_study', 'marital_status',
                         'did_you_seek_any_specialist_for_a_treatment?']

# CGPA predicted, not input, but required for encoding consistency
cgpa_input_features = ['choose_your_gender', 'age', 'what_is_your_course?',
                         'your_current_year_of_study', 'marital_status',
                         'did_you_seek_any_specialist_for_a_treatment?', 'has_issue']


# 🧾 Collect input
def get_user_input():
    print("\n📋 Please fill in the following information:\n")
    name = input("👤 Name: ")
    gender = input("⚧ Gender (Male/Female): ")
    age = int(input("🎂 Age (e.g., 20): "))
    course = input("📚 Course of Study: ")
    year = input("📅 Year (e.g., year 1, year 2): ")
    marital = input("💍 Marital Status (Single/Married): ")
    treatment = input("🩺 Sought Treatment? (Yes/No): ")
    actual_cgpa = float(input("📊 What is your actual CGPA? (e.g., 3.5): "))
    depression = input("😔 Do you experience depression? (Yes/No): ")
    anxiety = input("😟 Do you experience anxiety? (Yes/No): ")
    panic = input("😨 Do you experience panic attacks? (Yes/No): ")

    # Determine 'has_issue' based on user input for mental health
    has_issue_val = 'Has Issue' if depression.lower() == 'yes' or anxiety.lower() == 'yes' or panic.lower() == 'yes' else 'No Issue'


    return {
        "choose_your_gender": gender,
        "age": age,
        "what_is_your_course?": course,
        "your_current_year_of_study": year,
        "marital_status": marital,
        "did_you_seek_any_specialist_for_a_treatment?": treatment,
        "actual_cgpa": actual_cgpa,
        "name": name,
        "depression": depression,
        "anxiety": anxiety,
        "panic": panic,
        "has_issue": has_issue_val # Include 'has_issue' in the dictionary
    }

# 🤖 Predict & explain results
def predict_and_explain(user_input):
    name = user_input['name']

    # Mental health prediction
    df_mental = pd.DataFrame([{k: user_input[k] for k in mental_input_features}])
    df_mental_encoded = pd.DataFrame(mental_encoder.transform(df_mental), columns=mental_input_features)
    mental_pred = best_model_mental.predict(df_mental_encoded)
    mental_label = label_encoder_mental.inverse_transform(mental_pred)[0]

    explainer_mental = shap.Explainer(best_model_mental, df_mental_encoded)
    shap_values_mental = explainer_mental(df_mental_encoded)
    top_mental = shap_values_mental.values[0]
    mental_names = df_mental.columns

    # CGPA prediction
    # Use cgpa_input_features which now includes 'has_issue'
    df_cgpa = pd.DataFrame([{k: user_input[k] for k in cgpa_input_features}])
    df_cgpa_encoded = pd.DataFrame(cgpa_encoder.transform(df_cgpa), columns=cgpa_input_features)
    cgpa_pred = best_model_cgpa.predict(df_cgpa_encoded)
    cgpa_label = label_encoder_cgpa.inverse_transform(cgpa_pred)[0]

    explainer_cgpa = shap.Explainer(best_model_cgpa, df_cgpa_encoded)
    shap_values_cgpa = explainer_cgpa(df_cgpa_encoded)
    predicted_class_idx = cgpa_pred[0]
    shap_values_for_class = shap_values_cgpa.values[0][:, predicted_class_idx]
    cgpa_names = df_cgpa.columns

    # 🧠 Display results
    print(f"\n🧠 Mental Health Prediction for {name}: {mental_label}")
    if mental_label == "Healthy":
        print("✅ You seem to be doing okay mentally. Keep it up! 😊")
    else:
        print("⚠️ It looks like you're going through a tough time. You're not alone 💛")

    print(f"\n🎓 Predicted CGPA Class: {cgpa_label}")
    print(f"📊 Self-reported CGPA: {user_input['actual_cgpa']}")

    # SHAP Explanation: Mental
    print("\n📌 Why this Mental Health prediction was made:")
    for i in np.argsort(np.abs(top_mental))[::-1][:3]:
        feature = mental_names[i]
        val = df_mental.iloc[0][feature]
        direction = "helped reduce" if top_mental[i] < 0 else "may have contributed to"
        print(f"- Your answer '{val}' for '{feature}' {direction} mental health issues.")

    # SHAP Explanation: CGPA
    print("\n📘 Why this CGPA prediction was made:")
    for i in np.argsort(np.abs(shap_values_for_class))[::-1][:3]:
        feature = cgpa_names[i]
        val = df_cgpa.iloc[0][feature]
        direction = "helped improve" if shap_values_for_class[i] > 0 else "may have reduced"
        print(f"- Your answer '{val}' for '{feature}' {direction} your CGPA class prediction.")

    # Mental Health Question Summary
    print("\n📝 Your Responses to Key Mental Health Questions:")
    print(f"- 😔 Depression: {user_input['depression']}")
    print(f"- 😟 Anxiety: {user_input['anxiety']}")
    print(f"- 😨 Panic Attacks: {user_input['panic']}")

    # Final Advice
    print("\n💡 Suggestions & Support:")
    if mental_label == "Healthy":
        print("👍 Keep taking care of yourself. Stay consistent with good habits like proper sleep, study breaks, and talking to friends.")
    else:
        print("🧘‍♀️ Consider speaking with a counselor or trusted adult. You're strong, and getting help is a sign of strength, not weakness.")
        print("📍 You may also benefit from support groups, campus resources, or even mindfulness apps for stress relief.")

# ✅ Run interface
user_data = get_user_input()
predict_and_explain(user_data)


📋 Please fill in the following information:

👤 Name: bee
⚧ Gender (Male/Female): female
🎂 Age (e.g., 20): 21
📚 Course of Study: computer
📅 Year (e.g., year 1, year 2): year 1
💍 Marital Status (Single/Married): single
🩺 Sought Treatment? (Yes/No): yes
📊 What is your actual CGPA? (e.g., 3.5): 4
😔 Do you experience depression? (Yes/No): no
😟 Do you experience anxiety? (Yes/No): no
😨 Do you experience panic attacks? (Yes/No): no

🧠 Mental Health Prediction for bee: **Has Issue**
⚠️ It looks like you're going through a tough time. You're not alone 💛

🎓 Predicted CGPA Class: **High**
📊 Self-reported CGPA: 4.0

📌 Why this Mental Health prediction was made:
- Your answer 'yes' for 'did_you_seek_any_specialist_for_a_treatment?' may have contributed to mental health issues.
- Your answer 'single' for 'marital_status' may have contributed to mental health issues.
- Your answer 'year 1' for 'your_current_year_of_study' may have contributed to mental health issues.

📘 Why this CGPA prediction was 

# Save All Models and Encoders

In [37]:
import joblib

# Save Mental Health model and encoders
joblib.dump(best_model_mental, "mental_model.pkl")
joblib.dump(mental_encoder, "mental_encoder.pkl")
joblib.dump(label_encoder_mental, "label_encoder_mental.pkl")

# Save CGPA model and encoders
joblib.dump(best_model_cgpa, "cgpa_model.pkl")
joblib.dump(cgpa_encoder, "cgpa_encoder.pkl")
joblib.dump(label_encoder_cgpa, "label_encoder_cgpa.pkl")

print("✅ All models and encoders saved successfully!")


✅ All models and encoders saved successfully!


Download All Files to Your Computer

In [38]:
from google.colab import files

# Download Mental Health files
files.download("mental_model.pkl")
files.download("mental_encoder.pkl")
files.download("label_encoder_mental.pkl")

# Download CGPA files
files.download("cgpa_model.pkl")
files.download("cgpa_encoder.pkl")
files.download("label_encoder_cgpa.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [40]:
# import streamlit as st
# import shap
# import numpy as np
# import pandas as pd
# import joblib

# # Load all saved models and encoders
# best_model_mental = joblib.load("best_model_mental.pkl")
# best_model_cgpa = joblib.load("best_model_cgpa.pkl")
# mental_encoder = joblib.load("mental_encoder.pkl")
# cgpa_encoder = joblib.load("cgpa_encoder.pkl")
# label_encoder_mental = joblib.load("label_encoder_mental.pkl")
# label_encoder_cgpa = joblib.load("label_encoder_cgpa.pkl")

# mental_input_features = ['choose_your_gender', 'age', 'what_is_your_course?',
#                          'your_current_year_of_study', 'marital_status',
#                          'did_you_seek_any_specialist_for_a_treatment?']

# cgpa_input_features = mental_input_features + ['has_issue']

# # Streamlit app
# st.set_page_config(page_title="Mental Health & CGPA Predictor", layout="centered")
# st.title("🎓 Student Wellbeing & CGPA Predictor")

# with st.form("user_form"):
#     name = st.text_input("👤 Name")
#     gender = st.selectbox("⚧ Gender", ["Male", "Female"])
#     age = st.number_input("🎂 Age", min_value=15, max_value=50, step=1)
#     course = st.text_input("📚 Course of Study")
#     year = st.selectbox("📅 Year of Study", ["year 1", "year 2", "year 3", "year 4", "year 5"])
#     marital = st.selectbox("💍 Marital Status", ["Single", "Married"])
#     treatment = st.selectbox("🩺 Sought Treatment?", ["Yes", "No"])
#     actual_cgpa = st.number_input("📊 What is your actual CGPA?", min_value=0.0, max_value=5.0, step=0.01)

#     depression = st.selectbox("😔 Do you experience depression?", ["Yes", "No"])
#     anxiety = st.selectbox("😟 Do you experience anxiety?", ["Yes", "No"])
#     panic = st.selectbox("😨 Do you experience panic attacks?", ["Yes", "No"])

#     submit = st.form_submit_button("Predict Now")

# if submit:
#     has_issue_val = 'Has Issue' if "yes" in [depression.lower(), anxiety.lower(), panic.lower()] else 'No Issue'

#     user_input = {
#         "choose_your_gender": gender,
#         "age": age,
#         "what_is_your_course?": course,
#         "your_current_year_of_study": year,
#         "marital_status": marital,
#         "did_you_seek_any_specialist_for_a_treatment?": treatment,
#         "actual_cgpa": actual_cgpa,
#         "name": name,
#         "depression": depression,
#         "anxiety": anxiety,
#         "panic": panic,
#         "has_issue": has_issue_val
#     }

#     df_mental = pd.DataFrame([{k: user_input[k] for k in mental_input_features}])
#     df_mental_encoded = pd.DataFrame(mental_encoder.transform(df_mental), columns=mental_input_features)
#     mental_pred = best_model_mental.predict(df_mental_encoded)
#     mental_label = label_encoder_mental.inverse_transform(mental_pred)[0]

#     df_cgpa = pd.DataFrame([{k: user_input[k] for k in cgpa_input_features}])
#     df_cgpa_encoded = pd.DataFrame(cgpa_encoder.transform(df_cgpa), columns=cgpa_input_features)
#     cgpa_pred = best_model_cgpa.predict(df_cgpa_encoded)
#     cgpa_label = label_encoder_cgpa.inverse_transform(cgpa_pred)[0]

#     st.markdown("### 🧠 Mental Health Prediction:")
#     st.success(f"{mental_label}")
#     if mental_label == "Healthy":
#         st.info("✅ You seem to be doing okay mentally. Keep it up! 😊")
#     else:
#         st.warning("⚠️ It looks like you're going through a tough time. You're not alone 💛")

#     st.markdown("### 🎓 CGPA Prediction:")
#     st.success(f"Predicted CGPA Class: {cgpa_label}")
#     st.info(f"Self-reported CGPA: {actual_cgpa}")

#     # SHAP Explainability
#     explainer_mental = shap.Explainer(best_model_mental, df_mental_encoded)
#     shap_values_mental = explainer_mental(df_mental_encoded)
#     top_mental = shap_values_mental.values[0]

#     st.markdown("### 📌 Why this Mental Health prediction was made:")
#     for i in np.argsort(np.abs(top_mental))[::-1][:3]:
#         feat = mental_input_features[i]
#         val = df_mental.iloc[0][feat]
#         direction = "helped reduce" if top_mental[i] < 0 else "may have contributed to"
#         st.write(f"- '{val}' for '{feat}' {direction} mental health issues.")

#     explainer_cgpa = shap.Explainer(best_model_cgpa, df_cgpa_encoded)
#     shap_values_cgpa = explainer_cgpa(df_cgpa_encoded)
#     predicted_class_idx = cgpa_pred[0]
#     shap_values_for_class = shap_values_cgpa.values[0][:, predicted_class_idx]

#     st.markdown("### 📘 Why this CGPA prediction was made:")
#     for i in np.argsort(np.abs(shap_values_for_class))[::-1][:3]:
#         feat = cgpa_input_features[i]
#         val = df_cgpa.iloc[0][feat]
#         direction = "helped improve" if shap_values_for_class[i] > 0 else "may have reduced"
#         st.write(f"- '{val}' for '{feat}' {direction} your CGPA class.")

#     st.markdown("### 📝 Your Mental Health Responses:")
#     st.write(f"- 😔 Depression: {depression}")
#     st.write(f"- 😟 Anxiety: {anxiety}")
#     st.write(f"- 😨 Panic Attacks: {panic}")

#     st.markdown("### 💡 Suggestions & Support:")
#     if mental_label == "Healthy":
#         st.info("👍 Keep taking care of yourself. Stick to good habits like sleep, relaxation, and social connections.")
#     else:
#         st.info("🧘 Consider talking to a counselor or trusted adult. You're not alone. There are apps, groups, and people ready to help.")
