In [None]:
!pip install -q transformers torch pandas


In [None]:
import torch
import pandas as pd
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification




# Loading model

In [None]:
MODEL_PATH = "/content/drive/MyDrive/mental health bot/distilbert-mental-health-stratified"

tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_PATH)
model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()


OSError: Can't load tokenizer for '/content/drive/MyDrive/mental health bot/distilbert-mental-health-stratified'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '/content/drive/MyDrive/mental health bot/distilbert-mental-health-stratified' is the correct path to a directory containing all relevant files for a DistilBertTokenizerFast tokenizer.

In [None]:
LABEL_NAMES = [
    "suicidal_ideation",
    "depressive_language",
    "anxiety_related",
    "stress_related"
]


# Predicting

In [None]:
df = pd.read_csv("/content/drive/MyDrive/mental health bot/test.csv")

# Combine like during training
df["text"] = df["title"].fillna("") + " " + df["self_text"].fillna("")


In [None]:
def predict(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        logits = model(**inputs).logits

    probs = torch.sigmoid(logits)[0]

    return {
        LABEL_NAMES[i]: float(probs[i])
        for i in range(len(LABEL_NAMES))
    }


In [None]:
results = []

for txt in df["text"]:
    results.append(predict(txt))

pred_df = pd.DataFrame(results)
final_df = pd.concat([df, pred_df], axis=1)


In [None]:
def get_labels(row, thresh=0.5):
    return [k for k in LABEL_NAMES if row[k] >= thresh]

final_df["predicted_labels"] = final_df.apply(get_labels, axis=1)


In [None]:
df=final_df[['community','predicted_labels','text']]


In [None]:
text = "I don’t want to live anymore"
predict(text)


# Testing Accuracy

In [None]:
import numpy as np

LABEL_NAMES = [
    "suicidal_ideation",
    "depressive_language",
    "anxiety_related",
    "stress_related"
]

# True labels
y_true = final_df[LABEL_NAMES].values

# Predicted probabilities → binary labels
y_pred = (final_df[LABEL_NAMES].values >= 0.5).astype(int)


In [None]:
community_to_label = {
    "anxiety": "anxiety_related",
    "suicidewatch": "suicidal_ideation",
    "depressed": "depressive_language",
    "stressed": "stress_related"
}


In [None]:
final_df["true_label"] = final_df["community"].map(community_to_label)

In [None]:
def pick_one(lst):
    if isinstance(lst, list) and len(lst) > 0:
        return lst[0]
    else:
        return None

final_df["pred_label"] = final_df["predicted_labels"].apply(pick_one)


In [None]:
df_eval = final_df.dropna(subset=["pred_label", "true_label"])


In [None]:
LABEL_NAMES = [
    "suicidal_ideation",
    "depressive_language",
    "anxiety_related",
    "stress_related"
]

label2id = {l:i for i,l in enumerate(LABEL_NAMES)}

y_true = df_eval["true_label"].map(label2id).values
y_pred = df_eval["pred_label"].map(label2id).values

from sklearn.metrics import accuracy_score
acc = accuracy_score(y_true, y_pred)
print("Accuracy (community-based):", acc)


In [None]:
from sklearn.metrics import hamming_loss
hamming_acc = 1 - hamming_loss(y_true, y_pred)
print("Hamming Accuracy:", hamming_acc)

In [None]:
from sklearn.metrics import f1_score

print("F1 Micro   :", f1_score(y_true, y_pred, average="micro"))
print("F1 Macro   :", f1_score(y_true, y_pred, average="macro"))
print("F1 Weighted:", f1_score(y_true, y_pred, average="weighted"))


#Heat maps

In [None]:

label2id = {l:i for i,l in enumerate(LABEL_NAMES)}

y_true = df_eval["true_label"].map(label2id).values
y_pred = df_eval["pred_label"].map(label2id).values


In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

cm = confusion_matrix(y_true, y_pred)
cm


In [None]:
plt.figure(figsize=(7,6))
plt.imshow(cm, cmap="Blues")
plt.title("Confusion Matrix (Community vs Model Prediction)")
plt.xlabel("Predicted")
plt.ylabel("Actual")

plt.xticks(range(len(LABEL_NAMES)), LABEL_NAMES, rotation=30)
plt.yticks(range(len(LABEL_NAMES)), LABEL_NAMES)

for i in range(len(LABEL_NAMES)):
    for j in range(len(LABEL_NAMES)):
        plt.text(j, i, cm[i, j], ha="center", va="center")

plt.colorbar()
plt.tight_layout()
plt.show()


In [None]:
test_cases = [
    "I don’t want to live anymore",
    "I want to kill myself",
    "I feel empty every day",
    "work stress too much",
    "so tired from work",
    "Deadlines make me panic.",
    "Too much pressure makes me anxious.",
    "I feel overwhelmed and nervous.",
    "Work stress is making me panic."
    "I am anxious all the time",
    "I wake up empty every day and go to bed the same way.",
    "Nothing excites me anymore.",
    "I feel hollow inside."
]


In [None]:
for text in test_cases:
    output = predict(text)   # your model function

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Predicted Label:", max_label, "->", round(max_value, 3))
    print("-" * 60)


## Suicidal

In [None]:
t1=["I don’t see myself growing old.",
"I feel like my story should end soon.",
"Some people just aren’t meant to stay long.",
"I don’t belong in this world.",
"I feel like I’ve already lived enough."]

In [None]:
for text in t1:
    output = predict(text)

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", round(max_value, 3))
    print("-" * 60)

In [None]:
t2=["I’m not suicidal but I wish I wouldn’t wake up.",
"I don’t want to kill myself, I just don’t want to exist.",
"I’m safe, but I hate being alive.",
"I won’t hurt myself, but life feels useless."]

In [None]:
for text in t2:
    output = predict(text)   # your model function

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", round(max_value, 3))
    print("-" * 60)

In [None]:
t3=["I’m tired of fighting life.",
"I feel like giving up on everything.",
"I’m done trying.",
"I can’t do this anymore."
]

In [None]:
for text in t3:
    output = predict(text)

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", round(max_value, 3))
    print("-" * 60)

In [None]:
t4=["I wake up empty every day and go to bed the same way.",
"Nothing excites me anymore.",
"I feel hollow inside.",
"I exist, but I don’t live.",
"I feel emotionally dead.",
]
for text in t4:
    output = predict(text)

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Predicted Label:", max_label, "->", round(max_value, 3))
    print("-" * 60)

In [None]:
t5="""I don’t even know why I’m writing this. Every day feels heavy.
I wake up tired, go through the motions, and come back feeling empty.
People say things will get better, but I don’t believe them anymore.
Sometimes I wonder what would happen if I just wasn’t here."""

In [None]:
predict(t5)

## Deppresive

In [None]:
t_s=["I keep overthinking everything and it never stops.",
"My heart races even when nothing is wrong.",
"I feel nervous all the time for no reason.",
"I panic about small things.",
"I can’t stop worrying.",
"My mind won’t shut up.",
"I feel scared but don’t know why.",
"I get anxious in crowds.",
"I feel tense inside.",
"I overthink every conversation.",
"I keep imagining bad things happening.",
"I feel uneasy all day."
]
for text in t_s:
    output = predict(text)   # your model function

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", int(round(max_value, 2)*100))
    print("-" * 60)

## anxiety

In [None]:
t_s=["Deadlines make me panic.",
"Too much pressure makes me anxious.",
"I feel overwhelmed and nervous.",
"Work stress is making me panic.",
"Pressure makes my heart race.",
"I’m stressed and overthinking.",
]
for text in t_s:
    output = predict(text)   # your model function

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", int(round(max_value, 2)*100))
    print("-" * 60)

## Stress related

In [None]:
t_s=["work stress too much",
"so tired from work",
"deadlines again man",
"too much pressure fr",
"cant handle workload",
"work is killing me",
"need a break badly"
]

for text in t_s:
    output = predict(text)   # your model function

    # find label with max probability
    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print("Text:", text)
    print("Scores:", output)
    print("Predicted Label:", max_label, "->", int(round(max_value, 2)*100))
    print("-" * 60)

In [None]:
test_cases = [
    "I don’t want to live anymore",
    "I want to kill myself",
    "I feel empty every day",
    "work stress too much",
    "so tired from work",
    "deadlines again man",
    "Deadlines make me panic.",
    "Too much pressure makes me anxious.",
    "I feel overwhelmed and nervous.",
    "Work stress is making me panic.",
    "There is too much responsibility on me.",
    "I have too much work and no time to rest.",
    "My workload is getting heavier every week.",
    "I am anxious all the time",
    "I wake up empty every day and go to bed the same way.",
    "Nothing excites me anymore.",
    "I feel hollow inside."
]
print(f"{'Text':50} | {'Label':20} | Confidence")
print("-"*90)

for text in test_cases:
    output = predict(text)

    max_label = max(output, key=output.get)
    max_value = output[max_label]

    print(f"{text[:48]:50} | {max_label:20} | {round(max_value*100,2)}%")

