In [None]:
# === Cell 1: Imports & setup ===
import warnings, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import joblib, json
warnings.filterwarnings("ignore")

# For UI later
import gradio as gr


In [None]:
# If using Google Colab:
from google.colab import files

uploaded = files.upload()  # Upload your Processed.csv here

df = pd.read_csv('processed_dataset.csv')  # Update filename if needed

print(df.head())
print(df.info())

Saving processed_dataset.csv to processed_dataset (2).csv
    Age  Gender                                         University  \
0  20.0     1.0           Independent University, Bangladesh (IUB)   
1  20.0     0.0           Independent University, Bangladesh (IUB)   
2  20.0     0.0  American International University Bangladesh (...   
3  20.0     0.0  American International University Bangladesh (...   
4  20.0     0.0                       North South University (NSU)   

                                     Department  Current_CGPA  \
0  Engineering - CS / CSE / CSC / Similar to CS          2.75   
1  Engineering - CS / CSE / CSC / Similar to CS          3.20   
2  Engineering - CS / CSE / CSC / Similar to CS          3.20   
3  Engineering - CS / CSE / CSC / Similar to CS          3.20   
4  Engineering - CS / CSE / CSC / Similar to CS          2.75   

   waiver_or_scholarship  PSS1  PSS2  PSS3  PSS4  ...  PHQ6  PHQ7  PHQ8  PHQ9  \
0                      0     3     4     3     2 

In [None]:
# === Cell 3: Basic cleaning, Academic_Year + Department handling ===

# Drop rows with any NaN
df = df.dropna().reset_index(drop=True)

# --- Academic_Year ---
if "Academic_Year" not in df.columns:
    ay_cols = [c for c in df.columns if c.startswith("Academic_Year_")]
    if ay_cols:
        # Map the one-hot to numeric 1..4 and 5 for Other
        mapping = {
            "First Year or Equivalent": 1,
            "Second Year or Equivalent": 2,
            "Third Year or Equivalent": 3,
            "Fourth Year or Equivalent": 4,
            "Other": 5
        }
        # Create a numeric Academic_Year by checking which one-hot col is 1
        def infer_year(row):
            for c in ay_cols:
                if row[c] in [1, True, 'TRUE']:
                    label = c.replace("Academic_Year_", "")
                    return mapping.get(label, 5)
            return 5
        df["Academic_Year"] = df.apply(infer_year, axis=1)


dept_onehot_cols = [c for c in df.columns if c.startswith("Dept_")]
if "Department" in df.columns and not dept_onehot_cols:
    dummies = pd.get_dummies(df["Department"], prefix="Dept")
    df = pd.concat([df.drop(columns=["Department"]), dummies], axis=1)
    dept_onehot_cols = list(dummies.columns)
else:
    # Ensure list of Dept_ columns is up to date
    dept_onehot_cols = [c for c in df.columns if c.startswith("Dept_")]

print("Academic_Year dtype:", df["Academic_Year"].dtype)
print("Department one-hot columns:", dept_onehot_cols[:10], " ... (#:", len(dept_onehot_cols), ")")


Academic_Year dtype: int64
Department one-hot columns: ['Dept_Biological Sciences', 'Dept_Business and Entrepreneurship Studies', 'Dept_Engineering - CS / CSE / CSC / Similar to CS', 'Dept_Engineering - Civil Engineering / Similar to CE', 'Dept_Engineering - EEE/ ECE / Similar to EEE', 'Dept_Engineering - Mechanical Engineering / Similar to ME', 'Dept_Engineering - Other', 'Dept_Environmental and Life Sciences', 'Dept_Law and Human Rights', 'Dept_Liberal Arts and Social Sciences']  ... (#: 12 )


In [None]:
# === Cell 4: Mappings consistent with training ===

# Gender / Waiver (binary to numeric)
gender_map = {"Male": 1, "Female": 0}
waiver_map = {"Yes": 1, "No": 0}

# Academic year: already numeric in df; but UI will send strings -> map there
academic_year_map = {"1": 1, "2": 2, "3": 3, "4": 4, "Other": 5}

# Age & CGPA buckets (use your earlier bin medians)
age_map = {
    "18-20": 19,
    "21-23": 22,
    "24-26": 25,
    "27+": 28
}
cgpa_map = {
    "2.0-2.5": 2.25,
    "2.51-3.0": 2.75,
    "3.01-3.5": 3.25,
    "3.51-4.0": 3.75
}

# Likert options used across PSS, GAD, PHQ (0..3)
likert_map = {
    "Not at all": 0,
    "Several days": 1,
    "More than half the days": 2,
    "Nearly every day": 3
}


In [None]:
# === Cell 5: Build features and splits ===

# Core demographics + dept
base_demo = ["Age", "Gender", "Current_CGPA", "waiver_or_scholarship", "Academic_Year"] + dept_onehot_cols

# Instruments
pss_cols = [f"PSS{i}" for i in range(1, 11)]      # 10
gad_cols = [f"GAD{i}" for i in range(1, 8)]       # 7
phq_cols = [f"PHQ{i}" for i in range(1, 10)]      # 9

# Targets (must exist)
targets = {
    "Stress": ("Stress Value", base_demo + pss_cols),
    "Anxiety": ("Anxiety Value", base_demo + gad_cols),
    "Depression": ("Depression Value", base_demo + phq_cols),
}

X_stress = df[targets["Stress"][1]]
y_stress = df[targets["Stress"][0]]

X_anxiety = df[targets["Anxiety"][1]]
y_anxiety = df[targets["Anxiety"][0]]

X_depression = df[targets["Depression"][1]]
y_depression = df[targets["Depression"][0]]

# Split
X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_stress, y_stress, test_size=0.2, random_state=42)
X_train_a, X_test_a, y_train_a, y_test_a = train_test_split(X_anxiety, y_anxiety, test_size=0.2, random_state=42)
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_depression, y_depression, test_size=0.2, random_state=42)

# Scale numeric columns (all features are numeric now)
scaler_s = StandardScaler().fit(X_train_s)
scaler_a = StandardScaler().fit(X_train_a)
scaler_d = StandardScaler().fit(X_train_d)

Xs_train = scaler_s.transform(X_train_s)
Xs_test  = scaler_s.transform(X_test_s)

Xa_train = scaler_a.transform(X_train_a)
Xa_test  = scaler_a.transform(X_test_a)

Xd_train = scaler_d.transform(X_train_d)
Xd_test  = scaler_d.transform(X_test_d)

print(X_train_s.shape, X_train_a.shape, X_train_d.shape)


(1474, 27) (1474, 24) (1474, 26)


In [None]:
# === Cell 6: Train 5 models per target & compare ===

def train_and_compare(Xtr, Xte, ytr, yte, label):
    models = {
        "LinearRegression": LinearRegression(),
        "Ridge": Ridge(alpha=1.0, random_state=42),
        "KNN": KNeighborsRegressor(n_neighbors=5),
        "RandomForest": RandomForestRegressor(n_estimators=300, max_depth=None, random_state=42, n_jobs=-1),
        "GradientBoosting": GradientBoostingRegressor(random_state=42)
    }

    rows = []
    fitted = {}
    for name, mdl in models.items():
        mdl.fit(Xtr, ytr)
        yhat = mdl.predict(Xte)
        mae  = mean_absolute_error(yte, yhat)
        mse  = mean_squared_error(yte, yhat)
        rmse = np.sqrt(mse)
        r2   = r2_score(yte, yhat)
        rows.append([name, r2, mae, mse, rmse])
        fitted[name] = mdl

    comp = pd.DataFrame(rows, columns=["Model", "R2", "MAE", "MSE", "RMSE"]).sort_values(by="MAE")
    print(f"\n=== {label} — Model Comparison (sorted by MAE) ===")
    display(comp)
    # Choose best by MAE
    best_name = comp.iloc[0]["Model"]
    print(f"Best {label} model:", best_name)
    return fitted[best_name], comp

model_stress, comp_stress = train_and_compare(Xs_train, Xs_test, y_train_s, y_test_s, "Stress")
model_anxiety, comp_anxiety = train_and_compare(Xa_train, Xa_test, y_train_a, y_test_a, "Anxiety")
model_depression, comp_depression = train_and_compare(Xd_train, Xd_test, y_train_d, y_test_d, "Depression")



=== Stress — Model Comparison (sorted by MAE) ===


Unnamed: 0,Model,R2,MAE,MSE,RMSE
0,LinearRegression,1.0,6.407401e-15,6.640114e-29,8.148689e-15
1,Ridge,1.0,0.001151847,2.219343e-06,0.001489746
4,GradientBoosting,0.987802,0.5575896,0.5531239,0.743723
3,RandomForest,0.967246,0.8687715,1.48518,1.21868
2,KNN,0.908337,1.482927,4.156314,2.038704


Best Stress model: LinearRegression

=== Anxiety — Model Comparison (sorted by MAE) ===


Unnamed: 0,Model,R2,MAE,MSE,RMSE
0,LinearRegression,1.0,1.282924e-15,4.042645e-30,2.010633e-15
1,Ridge,1.0,0.0007991239,9.213956e-07,0.0009598936
4,GradientBoosting,0.998043,0.1695135,0.06317347,0.2513433
3,RandomForest,0.990559,0.3358266,0.3047844,0.5520728
2,KNN,0.951505,0.9062331,1.565528,1.251211


Best Anxiety model: LinearRegression

=== Depression — Model Comparison (sorted by MAE) ===


Unnamed: 0,Model,R2,MAE,MSE,RMSE
0,LinearRegression,1.0,7.235405e-15,8.033554e-29,8.96301e-15
1,Ridge,1.0,0.0009167237,1.250185e-06,0.001118117
4,GradientBoosting,0.995112,0.3232717,0.2341537,0.4838943
3,RandomForest,0.984336,0.5757904,0.7504058,0.8662597
2,KNN,0.945473,1.182656,2.612249,1.616245


Best Depression model: LinearRegression


In [None]:
# === Cell 7: Save artifacts

joblib.dump(scaler_s, "stress_scaler.pkl")
joblib.dump(scaler_a, "anxiety_scaler.pkl")
joblib.dump(scaler_d, "depression_scaler.pkl")

joblib.dump(model_stress, "stress_model.pkl")
joblib.dump(model_anxiety, "anxiety_model.pkl")
joblib.dump(model_depression, "depression_model.pkl")

joblib.dump(list(X_stress.columns), "stress_features.pkl")
joblib.dump(list(X_anxiety.columns), "anxiety_features.pkl")
joblib.dump(list(X_depression.columns), "depression_features.pkl")

joblib.dump(dept_onehot_cols, "dept_columns.pkl")

print("Saved: scalers, models, feature lists, dept columns.")


Saved: scalers, models, feature lists, dept columns.


In [None]:
def categorize_scores(stress, anxiety, depression):
    """Map raw scores to category labels."""
    # Stress (PSS-10)
    if stress <= 13:
        stress_cat = "Low Stress"
    elif stress <= 26:
        stress_cat = "Moderate Stress"
    else:
        stress_cat = "High Stress"

    # Anxiety (GAD-7)
    if anxiety <= 4:
        anxiety_cat = "Minimal Anxiety"
    elif anxiety <= 9:
        anxiety_cat = "Mild Anxiety"
    elif anxiety <= 14:
        anxiety_cat = "Moderate Anxiety"
    else:
        anxiety_cat = "Severe Anxiety"

    # Depression (PHQ-9)
    if depression <= 4:
        depression_cat = "Minimal Depression"
    elif depression <= 9:
        depression_cat = "Mild Depression"
    elif depression <= 14:
        depression_cat = "Moderate Depression"
    elif depression <= 19:
        depression_cat = "Moderately Severe Depression"
    else:
        depression_cat = "Severe Depression"

    return stress_cat, anxiety_cat, depression_cat

def compute_actual_scores(user_input_dict):
    """Sum instrument answers (mapped via likert_map) to actual raw scores."""
    pss = sum(
        likert_map[str(user_input_dict[f"PSS{i}"])] if isinstance(user_input_dict[f"PSS{i}"], str)
        else user_input_dict[f"PSS{i}"]
        for i in range(1, 11)
    )
    gad = sum(
        likert_map[str(user_input_dict[f"GAD{i}"])] if isinstance(user_input_dict[f"GAD{i}"], str)
        else user_input_dict[f"GAD{i}"]
        for i in range(1, 8)
    )
    phq = sum(
        likert_map[str(user_input_dict[f"PHQ{i}"])] if isinstance(user_input_dict[f"PHQ{i}"], str)
        else user_input_dict[f"PHQ{i}"]
        for i in range(1, 10)
    )
    return pss, gad, phq



def predict_all_mcq(user_input):
    """
    Converts MCQ answers into numeric, aligns with training columns,
    scales, and predicts categories only (UI-friendly).
    """
    # === Same encoding as before ===
    user_input = user_input.copy()
    user_input["Gender"] = gender_map[user_input["Gender"]]
    user_input["waiver_or_scholarship"] = waiver_map[user_input["waiver_or_scholarship"]]
    user_input["Academic_Year"] = academic_year_map[user_input["Academic_Year"]]
    user_input["Age"] = age_map[user_input["Age"]]
    user_input["Current_CGPA"] = cgpa_map[user_input["Current_CGPA"]]

    for col in pss_cols + gad_cols + phq_cols:
        if isinstance(user_input[col], str):
            user_input[col] = likert_map[user_input[col]]

    dept_row = pd.DataFrame(0, index=[0], columns=dept_onehot_cols)
    dept_col_name = f"Dept_{user_input['Department']}"
    if dept_col_name in dept_row.columns:
        dept_row.at[0, dept_col_name] = 1
    user_input.pop("Department")

    base_df = pd.DataFrame([user_input])
    base_df = pd.concat([base_df, dept_row], axis=1)

    # Reindex for alignment
    s_df = base_df.reindex(columns=list(X_stress.columns), fill_value=0)
    a_df = base_df.reindex(columns=list(X_anxiety.columns), fill_value=0)
    d_df = base_df.reindex(columns=list(X_depression.columns), fill_value=0)

    s_scaled = scaler_s.transform(s_df)
    a_scaled = scaler_a.transform(a_df)
    d_scaled = scaler_d.transform(d_df)

    # Predictions (not shown to user, but we could keep for logs)
    p_stress = float(model_stress.predict(s_scaled)[0])
    p_anx    = float(model_anxiety.predict(a_scaled)[0])
    p_depr   = float(model_depression.predict(d_scaled)[0])

    # Compute actual questionnaire totals
    actual_pss, actual_gad, actual_phq = compute_actual_scores(user_input)

    # Get categories
    stress_cat, anxiety_cat, depression_cat = categorize_scores(actual_pss, actual_gad, actual_phq)

    return {
        "Stress": stress_cat,
        "Anxiety": anxiety_cat,
        "Depression": depression_cat
    }


In [None]:
# === Cell 9: Gradio UI with MCQ and Chatbot ===
import gradio as gr
import pandas as pd, joblib, os
import random
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

age_dd = gr.Dropdown(choices=list(age_map.keys()), label="Age")
gender_dd = gr.Dropdown(choices=list(gender_map.keys()), label="Gender")
cgpa_dd = gr.Dropdown(choices=list(cgpa_map.keys()), label="Current CGPA")
waiver_dd = gr.Dropdown(choices=list(waiver_map.keys()), label="Waiver/Scholarship")
dept_choices = [c.replace("Dept_", "") for c in dept_onehot_cols]
dept_dd = gr.Dropdown(choices=dept_choices, label="Department")
acad_dd = gr.Dropdown(choices=list(academic_year_map.keys()), label="Academic Year")

# PSS-10 questions (0-4)
pss_questions = [
    "Been upset because of something that happened unexpectedly?",
    "Felt that you were unable to control the important things in your life?",
    "Felt nervous and 'stressed'?",
    "Felt confident about your ability to handle your personal problems?",
    "Felt that things were going your way?",
    "Found that you could not cope with all the things that you had to do?",
    "Been able to control irritations in your life?",
    "Felt that you were on top of things?",
    "Been angered because of things outside your control?",
    "Felt difficulties were piling up so high that you could not overcome them?"
]
pss_blocks = [gr.Radio(choices=["0", "1", "2", "3"], label=q) for q in pss_questions]

# GAD-7 questions (0-3)
gad_questions = [
    "Feeling nervous, anxious or on edge?",
    "Not being able to stop or control worrying?",
    "Worrying too much about different things?",
    "Trouble relaxing?",
    "Being so restless that it is hard to sit still?",
    "Becoming easily annoyed or irritable?",
    "Feeling afraid as if something awful might happen?"
]
gad_blocks = [gr.Radio(choices=["0", "1", "2", "3"], label=q) for q in gad_questions]

# PHQ-9 questions (0-3)
phq_questions = [
    "Little interest or pleasure in doing things?",
    "Feeling down, depressed, or hopeless?",
    "Trouble falling or staying asleep, or sleeping too much?",
    "Feeling tired or having little energy?",
    "Poor appetite or overeating?",
    "Feeling bad about yourself — or that you are a failure or have let yourself or your family down?",
    "Trouble concentrating on things, such as reading or watching television?",
    "Moving or speaking so slowly that other people could have noticed? Or being fidgety or restless?",
    "Thoughts that you would be better off dead or of hurting yourself in some way?"
]
phq_blocks = [gr.Radio(choices=["0", "1", "2", "3"], label=q) for q in phq_questions]

# --- Utility functions to load artifacts (features, scalers, models) ---
def load_feature_list(name):
    p = f"{name}_features.pkl"
    if os.path.exists(p):
        return list(joblib.load(p))
    else:
        raise FileNotFoundError(f"{p} not found. Please make sure training artifacts were saved.")

def load_model(name):
    p = f"{name}_model.pkl"
    if os.path.exists(p):
        return joblib.load(p)
    else:
        # fallback to in-memory variable if present (trained earlier in the session)
        if name == "stress" and 'model_stress' in globals():
            return model_stress
        if name == "anxiety" and 'model_anxiety' in globals():
            return model_anxiety
        if name == "depression" and 'model_depression' in globals():
            return model_depression
        raise FileNotFoundError(f"{p} not found and {name} model not in memory.")

def load_scaler(name):
    p = f"{name}_scaler.pkl"
    if os.path.exists(p):
        return joblib.load(p)
    else:
        if name == "stress" and 'scaler_s' in globals():
            return scaler_s
        if name == "anxiety" and 'scaler_a' in globals():
            return scaler_a
        if name == "depression" and 'scaler_d' in globals():
            return scaler_d
        raise FileNotFoundError(f"{p} not found and scaler not in memory.")

# load feature lists + models + scalers (these files are created by your 'save artifacts' cell)
stress_features = load_feature_list("stress")
anxiety_features = load_feature_list("anxiety")
depression_features = load_feature_list("depression")

model_stress = load_model("stress")
model_anxiety = load_model("anxiety")
model_depression = load_model("depression")

scaler_s = load_scaler("stress")
scaler_a = load_scaler("anxiety")
scaler_d = load_scaler("depression")

# Category helpers (same rules as earlier in the notebook)
def cat_pss(v):
    return "Low Stress" if v <= 13 else ("Moderate Stress" if v <= 26 else "High Stress")

def cat_gad(v):
    return "Minimal Anxiety" if v <= 4 else ("Mild Anxiety" if v <= 9 else ("Moderate Anxiety" if v <= 14 else "Severe Anxiety"))

def cat_phq(v):
    return "Minimal Depression" if v <= 4 else ("Mild Depression" if v <= 9 else ("Moderate Depression" if v <= 14 else ("Moderately Severe Depression" if v <= 19 else "Severe Depression")))

# --- RAG Database for Mental Health Tips ---
rag_database = {
    "Stress": [
        "Practice deep breathing exercises for 5 minutes daily to reduce stress.",
        "Try progressive muscle relaxation: tense and relax each muscle group from toes to head.",
        "Take short breaks during work to stretch and walk around.",
        "Limit caffeine intake as it can increase anxiety and stress levels.",
        "Establish a consistent sleep schedule to improve stress resilience.",
        "Practice mindfulness meditation for 10 minutes each day.",
        "Engage in regular physical activity, even a short walk can help.",
        "Use a journal to write down your thoughts and concerns.",
        "Listen to calming music or nature sounds when feeling stressed.",
        "Learn to say no to additional responsibilities when you're already overwhelmed."
    ],
    "Anxiety": [
        "Practice the 5-4-3-2-1 grounding technique: identify 5 things you can see, 4 you can touch, 3 you can hear, 2 you can smell, and 1 you can taste.",
        "Challenge anxious thoughts by asking 'What's the evidence for this thought?'",
        "Limit exposure to news and social media if it increases your anxiety.",
        "Try aromatherapy with lavender or chamomile essential oils.",
        "Practice box breathing: inhale for 4 counts, hold for 4, exhale for 4, hold for 4.",
        "Create a worry period: set aside 15 minutes each day to process your worries.",
        "Stay connected with supportive friends and family members.",
        "Reduce sugar intake as it can contribute to anxiety symptoms.",
        "Try yoga or tai chi for mind-body connection and relaxation.",
        "Use positive affirmations to counter negative self-talk."
    ],
    "Depression": [
        "Establish a daily routine to provide structure and purpose.",
        "Set small, achievable goals to build a sense of accomplishment.",
        "Expose yourself to sunlight for at least 15 minutes daily.",
        "Practice gratitude by listing three things you're thankful for each day.",
        "Engage in activities you used to enjoy, even if you don't feel like it initially.",
        "Reach out to supportive people instead of isolating yourself.",
        "Consider volunteering to shift focus from your own challenges.",
        "Limit time spent in bed during the day to maintain sleep quality at night.",
        "Try cognitive behavioral therapy techniques to reframe negative thoughts."
    ]
}

# --- Lightweight Chatbot Setup ---
def load_chatbot():
    """Load a lightweight chatbot model (around 1B parameters)"""
    try:
        # Using a small, efficient model that works well in Colab
        model_name = "microsoft/DialoGPT-small"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
        return pipeline("text-generation", model=model, tokenizer=tokenizer)
    except Exception as e:
        print(f"Error loading chatbot model: {e}")
        return None

# Initialize chatbot
chatbot = load_chatbot()

# Global variable to store assessment results for chatbot context
last_assessment_results = None

# --- Core predict_and_compare function ---
def predict_and_compare(age, gender, cgpa, waiver, dept, acad, *answers):
    """
    Inputs order: age, gender, cgpa, waiver, dept, acad, then 10 PSS answers, 7 GAD answers, 9 PHQ answers (all as strings)
    Returns: (results_dict, summary_string)
    Side-effect: sets global variable last_assessment_results used by the chatbot.
    """
    global last_assessment_results

    try:
        Age = age_map[age] if age in age_map else float(age)
        Gender = gender_map[gender] if gender in gender_map else int(gender)
        Current_CGPA = cgpa_map[cgpa] if cgpa in cgpa_map else float(cgpa)
        waiver_or_scholarship = waiver_map[waiver] if waiver in waiver_map else int(waiver)
        Academic_Year = academic_year_map[acad] if acad in academic_year_map else int(acad)
    except Exception as e:
        return {"Error": "Invalid demographic mapping. Run mapping cells."}, "Error"

    # parse instrument answers
    answers = [int(a) for a in answers]
    pss_vals = answers[:10]
    gad_vals = answers[10:17]
    phq_vals = answers[17:26]

    phq_total = sum(phq_vals)
    gad_total = sum(gad_vals)
    pss_total = sum(pss_vals)

    # build a single input row with demographics + dept one-hots + instrument items
    row = {}
    row["Age"] = Age
    row["Gender"] = Gender
    row["Current_CGPA"] = Current_CGPA
    row["waiver_or_scholarship"] = waiver_or_scholarship
    row["Academic_Year"] = Academic_Year

    chosen_dept_col = "Dept_" + dept
    for col in dept_onehot_cols:
        row[col] = 1 if col.replace("Dept_", "") == dept else 0

    for i, v in enumerate(pss_vals, start=1):
        row[f"PSS{i}"] = v
    for i, v in enumerate(gad_vals, start=1):
        row[f"GAD{i}"] = v
    for i, v in enumerate(phq_vals, start=1):
        row[f"PHQ{i}"] = v

    # Reindex into training features (fills missing with 0)
    X_s = pd.DataFrame([row]).reindex(columns=stress_features).fillna(0)
    X_a = pd.DataFrame([row]).reindex(columns=anxiety_features).fillna(0)
    X_d = pd.DataFrame([row]).reindex(columns=depression_features).fillna(0)

    # scale and predict
    Xs_scaled = scaler_s.transform(X_s)
    Xa_scaled = scaler_a.transform(X_a)
    Xd_scaled = scaler_d.transform(X_d)

    pred_stress = float(model_stress.predict(Xs_scaled)[0])
    pred_anxiety = float(model_anxiety.predict(Xa_scaled)[0])
    pred_depression = float(model_depression.predict(Xd_scaled)[0])

    # derive questionnaire categories (for context/labels)
    stress_label = cat_pss(pss_total)
    anxiety_label = cat_gad(gad_total)
    depression_label = cat_phq(phq_total)

    # Store results for chatbot context
    last_assessment_results = {
        "raw_totals": {"PHQ9": phq_total, "GAD7": gad_total, "PSS10": pss_total},
        "predicted_scores": {"Depression": pred_depression, "Anxiety": pred_anxiety, "Stress": pred_stress},
        "labels": {"Depression": depression_label, "Anxiety": anxiety_label, "Stress": stress_label}
    }

    summary = f"Raw totals — PHQ9: {phq_total}, GAD7: {gad_total}, PSS10: {pss_total} | Labels — Dep: {depression_label}, Anx: {anxiety_label}, Str: {stress_label}"
    return last_assessment_results, summary

# --- Chatbot Functions ---
def get_initial_chatbot_message():
    """Generate an initial supportive message based on assessment results"""
    global last_assessment_results

    if not last_assessment_results:
        return "How else can I help you today?"

    labels = last_assessment_results["labels"]

    # Determine the most severe condition
    severity_order = {
        "Severe Depression": 5, "Moderately Severe Depression": 4, "Moderate Depression": 3,
        "Severe Anxiety": 5, "Moderate Anxiety": 3, "High Stress": 4, "Moderate Stress": 2
    }

    # Find the most severe condition
    max_severity = 0
    primary_condition = None
    for condition, label in labels.items():
        severity = severity_order.get(label, 1)
        if severity > max_severity:
            max_severity = severity
            primary_condition = condition

    # Generate initial message based on the most severe condition
    if primary_condition:
        tips = rag_database.get(primary_condition, [])
        if tips:
            tip = random.choice(tips)
            return f"I see you're experiencing {labels[primary_condition].lower()}. {tip} Would you like to talk more about this or ask for additional support?"

    return "Thank you for completing the assessment. I'm here to provide support and resources. How are you feeling today?"

def chat_with_bot(message, chat_history):
    """Handle chatbot interactions with RAG capabilities"""
    global last_assessment_results

    # Check if user is asking for help with a specific condition
    conditions = ["stress", "anxiety", "depression"]
    mentioned_conditions = [cond for cond in conditions if cond in message.lower()]

    # Use RAG to find relevant tips
    relevant_tips = []
    for condition in mentioned_conditions:
        relevant_tips.extend(rag_database.get(condition.capitalize(), []))

    # If no specific condition mentioned but we have assessment results:
    if not mentioned_conditions and last_assessment_results:
        # Find the most severe condition from assessment
        labels = last_assessment_results["labels"]
        severity_order = {
            "Severe Depression": 5, "Moderately Severe Depression": 4, "Moderate Depression": 3,
            "Severe Anxiety": 5, "Moderate Anxiety": 3, "High Stress": 4, "Moderate Stress": 2
        }

        max_severity = 0
        primary_condition = None
        for condition, label in labels.items():
            severity = severity_order.get(label, 1)
            if severity > max_severity:
                max_severity = severity
                primary_condition = condition

        if primary_condition:
            relevant_tips.extend(rag_database.get(primary_condition, []))

    # If we have relevant tips from RAG, use them
    if relevant_tips:
        tip = random.choice(relevant_tips)
        response = f"Here's a suggestion: {tip}"
    else:
        # Use the chatbot model for general conversation
        if chatbot:
            try:
                # Generate response using the chatbot model
                chat_history_ids = None
                for step in range(1):
                    new_user_input_ids = chatbot.tokenizer.encode(message + chatbot.tokenizer.eos_token, return_tensors='pt')
                    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
                    chat_history_ids = chatbot.model.generate(
                        bot_input_ids, max_length=1000,
                        pad_token_id=chatbot.tokenizer.eos_token_id,
                        no_repeat_ngram_size=3,
                        do_sample=True,
                        top_k=100,
                        top_p=0.7,
                        temperature=0.8
                    )
                    response = chatbot.tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
            except:
                response = "I'm here to listen and provide support. How are you feeling today?"
        else:
            response = "I'm here to listen and provide support. How are you feeling today?"

    chat_history.append((message, response))
    return "", chat_history

def launch_chatbot():
    """Initialize the chatbot with the assessment context"""
    initial_message = get_initial_chatbot_message()
    return initial_message, [(initial_message, "Hello! I'm here to provide support based on your mental health assessment. How can I help you today?")]

# --- Build inputs list and launch Interface ---
inputs = [age_dd, gender_dd, cgpa_dd, waiver_dd, dept_dd, acad_dd] + pss_blocks + gad_blocks + phq_blocks

with gr.Blocks(title="Mental Health Assessment with Support Chat") as demo:
    gr.Markdown("# Mental Health Assessment")
    gr.Markdown("Answer MCQs to get predicted Stress/Anxiety/Depression and see your instrument totals. Scale of 0-3 with 3 being most severe.")

    with gr.Row():
        with gr.Column():
            # Assessment inputs
            gr.Markdown("## Demographic Information")
            age_dd.render()
            gender_dd.render()
            cgpa_dd.render()
            waiver_dd.render()
            dept_dd.render()
            acad_dd.render()

            gr.Markdown("## PSS-10 Questions (Stress)")
            for pss in pss_blocks:
                pss.render()

            gr.Markdown("## GAD-7 Questions (Anxiety)")
            for gad in gad_blocks:
                gad.render()

            gr.Markdown("## PHQ-9 Questions (Depression)")
            for phq in phq_blocks:
                phq.render()

            submit_btn = gr.Button("Submit Assessment", variant="primary")

        with gr.Column():
            # Results display
            gr.Markdown("## Assessment Results")
            results_json = gr.JSON(label="Results")
            summary_text = gr.Textbox(label="Summary", lines=4)

            # Chatbot section
            gr.Markdown("## Mental Health Support Chat")
            chatbot_btn = gr.Button("Start Chat with Support Assistant", variant="secondary")
            chatbot = gr.Chatbot(label="Chat with Support Assistant")
            msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
            send_btn = gr.Button("Send", variant="primary")
            clear_btn = gr.Button("Clear Chat")

    # Event handlers
    submit_btn.click(
        predict_and_compare,
        inputs=inputs,
        outputs=[results_json, summary_text]
    )

    chatbot_btn.click(
        launch_chatbot,
        inputs=[],
        outputs=[msg, chatbot]
    )

    msg.submit(
        chat_with_bot,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot]
    )

    send_btn.click(
        chat_with_bot,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot]
    )

    clear_btn.click(lambda: None, None, chatbot, queue=False)

demo.launch(debug=True)

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://64973e893083dd7f3c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://64973e893083dd7f3c.gradio.live




In [None]:
# ==========================
# Demo Cell: Numerical Scores without UI (No chatbot, purely for demo)
# ==========================

import numpy as np
import pandas as pd
import os, joblib

# -------- Hardcoded realistic test case --------

demo = {
    "Age": 21,                         # numeric
    "Gender": 1,                       # 1=Male, 0=Female
    "Current_CGPA": 3.2,               # numeric
    "waiver_or_scholarship": 0,        # 1=Yes, 0=No
    "Academic_Year": 2,                # 1,2,3,4,5
    "Department": "Engineering - CS / CSE / CSC / Similar to CS"
}

# Questionnaire responses (same ranges as instruments)
# Mild depression, mild anxiety, moderate stress
PHQ = {"PHQ"+str(i): v for i, v in enumerate([1,1,2,1,1,0,1,2,1], start=1)}        # 0-3 each
GAD = {"GAD"+str(i): v for i, v in enumerate([1,0,2,1,1,1,0], start=1)}            # 0-3 each
PSS = {"PSS"+str(i): v for i, v in enumerate([2,2,1,3,2,1,2,1,2,3], start=1)}      # 0-3 each

# Raw totals
phq_total = sum(PHQ.values())
gad_total = sum(GAD.values())
pss_total = sum(PSS.values())

print("=== Raw Questionnaire Scores ===")
print(f"PHQ-9 Score (Depression): {phq_total}")
print(f"GAD-7 Score (Anxiety): {gad_total}")
print(f"PSS-10 Score (Stress): {pss_total}")

# -------- 2) Resolve feature lists exactly as used in training --------
def get_feature_list_fallback(name_in_memory, pkl_filename, default_builder):
    """
    Prefer the in-memory DataFrame's .columns if available (e.g., X_stress.columns).
    Else, load from saved pickle (e.g., 'stress_features.pkl').
    Else, build from defaults using current globals.
    """
    # In-memory X_* DataFrame exists?
    if name_in_memory in globals() and isinstance(globals()[name_in_memory], pd.DataFrame):
        return list(globals()[name_in_memory].columns)

    # Saved pkl exists?
    if os.path.exists(pkl_filename):
        return list(joblib.load(pkl_filename))

    # Fallback: build from known structure
    return default_builder()

# We need dept_onehot_cols in the same order as training.
# Use in-memory if present; otherwise try to load saved; otherwise infer from dataset.
def resolve_dept_onehot_cols():
    if 'dept_onehot_cols' in globals():
        return list(dept_onehot_cols)
    if os.path.exists('dept_columns.pkl'):
        return list(joblib.load('dept_columns.pkl'))
    # Last resort: infer from the processed dataset you loaded earlier
    if 'df' in globals() and 'Department' in df.columns:
        return list(pd.get_dummies(df['Department'], prefix='Dept').columns)
    raise RuntimeError("Cannot resolve dept_onehot_cols. Run earlier cells first or ensure dept_columns.pkl exists.")

dept_onehot_cols_resolved = resolve_dept_onehot_cols()

def default_features_stress():
    base_demo = ["Age","Gender","Current_CGPA","waiver_or_scholarship","Academic_Year"] + dept_onehot_cols_resolved
    pss_cols  = [f"PSS{i}" for i in range(1, 11)]
    return base_demo + pss_cols

def default_features_anxiety():
    base_demo = ["Age","Gender","Current_CGPA","waiver_or_scholarship","Academic_Year"] + dept_onehot_cols_resolved
    gad_cols  = [f"GAD{i}" for i in range(1, 8)]
    return base_demo + gad_cols

def default_features_depression():
    base_demo = ["Age","Gender","Current_CGPA","waiver_or_scholarship","Academic_Year"] + dept_onehot_cols_resolved
    phq_cols  = [f"PHQ{i}" for i in range(1, 10)]
    return base_demo + phq_cols

stress_features     = get_feature_list_fallback("X_stress",     "stress_features.pkl",     default_features_stress)
anxiety_features    = get_feature_list_fallback("X_anxiety",    "anxiety_features.pkl",    default_features_anxiety)
depression_features = get_feature_list_fallback("X_depression", "depression_features.pkl", default_features_depression)

def build_row_for_target(features):
    row = {}

    # Base demographics
    row["Age"] = demo["Age"]
    row["Gender"] = demo["Gender"]
    row["Current_CGPA"] = demo["Current_CGPA"]
    row["waiver_or_scholarship"] = demo["waiver_or_scholarship"]
    row["Academic_Year"] = demo["Academic_Year"]

    # Department one-hots (all zero, set chosen dept=1 if it exists)
    # Column names look like: 'Dept_Engineering - CS / CSE / CSC / Similar to CS'
    chosen_dept_col = "Dept_" + demo["Department"]
    for col in dept_onehot_cols_resolved:
        row[col] = 1 if col == chosen_dept_col else 0

    # Instruments
    row.update(PSS)
    row.update(GAD)
    row.update(PHQ)

    # Ensure all expected features exist; fill any missing with 0
    final = {col: row.get(col, 0) for col in features}
    return pd.DataFrame([final], columns=features)

X_s_row = build_row_for_target(stress_features)
X_a_row = build_row_for_target(anxiety_features)
X_d_row = build_row_for_target(depression_features)

# Sanity-check shapes match what the models expect
print("\nShapes — Stress/Anxiety/Depression:", X_s_row.shape, X_a_row.shape, X_d_row.shape)

# --------Scale using the exact scalers from training --------
def resolve_scaler(varname, pkl_name):
    if varname in globals():
        return globals()[varname]
    if os.path.exists(pkl_name):
        return joblib.load(pkl_name)
    raise RuntimeError(f"Missing scaler: {varname} and {pkl_name} not found.")

scaler_s = resolve_scaler("scaler_s", "stress_scaler.pkl")
scaler_a = resolve_scaler("scaler_a", "anxiety_scaler.pkl")
scaler_d = resolve_scaler("scaler_d", "depression_scaler.pkl")

Xs_demo = scaler_s.transform(X_s_row)
Xa_demo = scaler_a.transform(X_a_row)
Xd_demo = scaler_d.transform(X_d_row)

# -------- 5) Predict using the best models chosen earlier --------
def resolve_model(varname, pkl_name):
    if varname in globals():
        return globals()[varname]
    if os.path.exists(pkl_name):
        return joblib.load(pkl_name)
    raise RuntimeError(f"Missing model: {varname} and {pkl_name} not found.")

model_stress     = resolve_model("model_stress",     "stress_model.pkl")
model_anxiety    = resolve_model("model_anxiety",    "anxiety_model.pkl")
model_depression = resolve_model("model_depression", "depression_model.pkl")

pred_stress     = float(model_stress.predict(Xs_demo)[0])
pred_anxiety    = float(model_anxiety.predict(Xa_demo)[0])
pred_depression = float(model_depression.predict(Xd_demo)[0])

print("\n=== Predicted Scores (ML Models) ===")
print(f"Predicted Stress: {pred_stress:.3f}")
print(f"Predicted Anxiety: {pred_anxiety:.3f}")
print(f"Predicted Depression: {pred_depression:.3f}")

# -------- Show quick categories from raw totals (for context only) --------
def cat_pss(v):
    return "Low Stress" if v <= 13 else ("Moderate Stress" if v <= 26 else "High Stress")
def cat_gad(v):
    return "Minimal Anxiety" if v <= 4 else ("Mild Anxiety" if v <= 9 else ("Moderate Anxiety" if v <= 14 else "Severe Anxiety"))
def cat_phq(v):
    return "Minimal Depression" if v <= 4 else ("Mild Depression" if v <= 9 else ("Moderate Depression" if v <= 14 else ("Moderately Severe Depression" if v <= 19 else "Severe Depression")))

print("\n=== Raw Score Categories (from questionnaire) ===")
print(f"Stress: {cat_pss(pss_total)}")
print(f"Anxiety: {cat_gad(gad_total)}")
print(f"Depression: {cat_phq(phq_total)}")


=== Raw Questionnaire Scores ===
PHQ-9 Score (Depression): 10
GAD-7 Score (Anxiety): 6
PSS-10 Score (Stress): 19

Shapes — Stress/Anxiety/Depression: (1, 27) (1, 24) (1, 26)

=== Predicted Scores (ML Models) ===
Predicted Stress: 23.000
Predicted Anxiety: 6.000
Predicted Depression: 10.000

=== Raw Score Categories (from questionnaire) ===
Stress: Moderate Stress
Anxiety: Mild Anxiety
Depression: Moderate Depression
