In [None]:
!pip install transformers datasets scikit-learn torch langdetect imbalanced-learn duckduckgo-search openai

#imports

In [None]:
import pandas as pd
import re
from langdetect import detect
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch
from duckduckgo_search import DDGS
from openai import OpenAI

# load data and Preprocessing

In [None]:
# Load dataset

df = pd.read_csv("Combined Data.csv")
df = df.drop(columns=['Unnamed: 0'])
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

# Preprocessing function

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)         # Remove URLs
    text = re.sub(r"@\w+", "", text)                   # Remove mentions
    text = re.sub(r"[^\w\s]", "", text)                # Remove punctuation
    text = re.sub(r'(.)\1{2,}', r'\1\1', text)         # Reduce elongated words
    # Keep only English
    try:
        if detect(text) != 'en':
            return ""
    except:
        return ""
    text = re.sub(r"\s+", " ", text).strip()           # Remove extra spaces
    return text

df['statement'] = df['statement'].apply(clean_text)
df = df[df['statement'] != ""]  # Remove empty rows



# encoding amd data preparation

In [None]:
# Label encoding

lbl_enc = LabelEncoder()
df['label'] = lbl_enc.fit_transform(df['status'])

train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=101)


# Oversample minority classes

ros = RandomOverSampler(random_state=101)
X_res, y_res = ros.fit_resample(df[['statement']], df['label'])
df_balanced = pd.DataFrame({'statement': X_res['statement'], 'label': y_res})


# Train-test split

train_df, test_df = train_test_split(df_balanced, test_size=0.2, stratify=df_balanced['label'], random_state=101)


# Convert to Hugging Face dataset

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


# Tokenization

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(
        [str(t) for t in examples['statement']],
        padding='max_length',
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])



# Bert model

In [None]:
# Model

num_labels = len(lbl_enc.classes_)

model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=num_labels
)


In [None]:

# Training arguments

training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch", # Changed from evaluation_strategy
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50,
    save_strategy="epoch",
    load_best_model_at_end=True
)

In [None]:


# Metrics

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds, average='weighted'),
        'precision': precision_score(labels, preds, average='weighted'),
        'recall': recall_score(labels, preds, average='weighted')
    }


In [None]:


# Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


  trainer = Trainer(


In [None]:


# Train & Evaluate

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2642,0.227976,0.915364,0.913614,0.916835,0.915364
2,0.1614,0.222881,0.925853,0.924752,0.92593,0.925853


TrainOutput(global_step=10392, training_loss=0.2528276628856938, metrics={'train_runtime': 4219.7796, 'train_samples_per_second': 39.401, 'train_steps_per_second': 2.463, 'total_flos': 1.09811697397248e+16, 'train_loss': 0.2528276628856938, 'epoch': 2.0})

In [None]:
results = trainer.evaluate()
print(results)


# save models

In [None]:
import joblib

# Save the label encoder
joblib.dump(lbl_enc, "/content/label_encoder.pkl")

['/content/label_encoder.pkl']

In [None]:
# Build label map from it
label_map = dict(enumerate(lbl_enc.classes_))

def predict_mental_health(text: str) -> str:
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    ).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        pred_id = torch.argmax(outputs.logits, dim=1).item()

    return label_map[pred_id]

In [None]:
# Save model
trainer.save_model("./my_fine_tuned_bert_model")



# Test function using Bert

In [None]:

# Prediction function

label_map = dict(enumerate(lbl_enc.classes_))

def predict_mental_health(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    # Move input tensors to the same device as the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.to(device)  # Ensure model is on the correct device
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        pred_id = torch.argmax(logits, dim=1).item()
    return label_map[pred_id]

# Example
user_input = input("Enter your text: ")
print("Predicted:", predict_mental_health(user_input))

Enter your text: i want to die
Predicted: Suicidal


# Full model in notebook

In [None]:
!pip install -U ddgs
from ddgs import DDGS


In [None]:
#Install dependencies

!pip install ddgs transformers torch together openai -q

In [None]:
# Imports

from ddgs import DDGS
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from openai import OpenAI
import requests
import joblib


# Load Fine-tuned BERT Model + Label Encoder

MODEL_PATH = "/content/my_fine_tuned_bert_model"  # Path in Colab
LABEL_ENCODER_PATH = "/content/label_encoder.pkl"

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)

# Load label encoder and create map
lbl_enc = joblib.load(LABEL_ENCODER_PATH)
label_map = dict(enumerate(lbl_enc.classes_))

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Prediction function
def predict_mental_health(text: str) -> str:
    """Predicts mental health condition from text using trained label encoder."""
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        pred_id = torch.argmax(outputs.logits, dim=1).item()

    return label_map.get(pred_id, "Unknown")


In [None]:
# Together AI Model Selection
TOGETHER_API_KEY = "tgp_v1_kdsm8JmzocWAQRKWI54K09Egw7m43cdRLp77ce6ginY"  # Replace

def get_first_llama_model():
    """Fetches the first available LLaMA instruct model from Together AI."""
    try:
        resp = requests.get(
            "https://api.together.xyz/v1/models",
            headers={"Authorization": f"Bearer {TOGETHER_API_KEY}"}
        )
        resp.raise_for_status()
        data = resp.json()

        models_list = data if isinstance(data, list) else data.get("data", [])
        for m in models_list:
            model_id = m["id"].lower()
            if "llama" in model_id and "instruct" in model_id:
                return m["id"]

    except Exception as e:
        print(f"❌ Error fetching models: {e}")

    raise ValueError("No LLaMA instruct model found!")

LLAMA_MODEL = get_first_llama_model()
print(f"✅ Using Together AI model: {LLAMA_MODEL}")


# Together AI Client

client = OpenAI(
    api_key=TOGETHER_API_KEY,
    base_url="https://api.together.xyz/v1"
)

✅ Using Together AI model: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo


In [None]:
# DuckDuckGo Search

def search_resources(condition: str):
    """Searches for mental health resources related to the condition."""
    results = []
    try:
        with DDGS() as ddgs:
            for r in ddgs.text(f"{condition} mental health help site:.org", max_results=3):
                results.append({
                    "title": r.get("title", "No title"),
                    "link": r.get("href", "#")
                })
    except Exception as e:
        print(f"⚠️ Search failed: {e}")
    return results

In [None]:
# Generate Advice

def generate_advice(condition: str, user_text: str, resources: list):
    """Generates advice using LLaMA model with context and resource links."""
    resources_str = "\n".join([f"- {r['title']}: {r['link']}" for r in resources])

    prompt = f"""
You are a compassionate mental health assistant.
User text: "{user_text}"
Predicted condition: {condition}

Provide safe, practical advice for the user.
Include these resources at the end:

{resources_str}
"""

    try:
        resp = client.chat.completions.create(
            model=LLAMA_MODEL,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=300
        )
        return resp.choices[0].message.content
    except Exception as e:
        return f"⚠️ Error generating advice: {e}"


In [None]:
# Full Pipeline Test

if __name__ == "__main__":
    user_input = input("Enter your text: ").strip()
    pred_condition = predict_mental_health(user_input)
    links = search_resources(pred_condition)
    advice = generate_advice(pred_condition, user_input, links)

    print(f"\n🧠 Predicted Condition: {pred_condition}")
    print("\n💡 Advice:\n", advice)
    print("\n🔗 Resources:")
    for l in links:
        print(f"- {l['title']}: {l['link']}")

Enter your text: iam happy

🧠 Predicted Condition: Normal

💡 Advice:
 It's wonderful to hear that you're feeling happy.  It's essential to acknowledge and appreciate these positive emotions when they arise. To maintain this feeling, consider the following tips:

1. **Practice gratitude**: Take a moment each day to reflect on the things that bring you joy and make you feel grateful. This can be as simple as a good cup of coffee, a beautiful sunset, or a supportive friend.
2. **Stay connected**: Reach out to loved ones, engage in social activities, or join a community group that aligns with your interests. Social connections can help you feel supported and happy.
3. **Take care of yourself**: Prioritize self-care by getting enough sleep, exercising regularly, and eating a balanced diet. Taking care of your physical health can have a positive impact on your mental well-being.
4. **Engage in activities you enjoy**: Make time for hobbies, passions, or creative pursuits that bring you happin

# Telegram Bot

In [None]:
# Install dependencies

!pip install --upgrade python-telegram-bot



In [None]:
# Step 2 — Imports

import logging
from ddgs import DDGS
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from openai import OpenAI
import requests
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, filters, ContextTypes
import joblib

In [None]:
# Config

TELEGRAM_TOKEN = "8208756295:AAExi2v_WMu5_3ISRyDRntcKmODxgljGhQE"  # Replace
TOGETHER_API_KEY = "tgp_v1_kdsm8JmzocWAQRKWI54K09Egw7m43cdRLp77ce6ginY"  # Replace

MODEL_PATH = "/content/my_fine_tuned_bert_model"  # Fine-tuned model
LABEL_ENCODER_PATH = "/content/label_encoder.pkl"  # Saved from training


In [None]:
# Load models & label map

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

lbl_enc = joblib.load(LABEL_ENCODER_PATH)
label_map = dict(enumerate(lbl_enc.classes_))

def predict_mental_health(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        pred_id = torch.argmax(outputs.logits, dim=1).item()
    return label_map.get(pred_id, "Unknown")

In [None]:
# Get first LLaMA model

def get_first_llama_model():
    url = "https://api.together.xyz/v1/models"
    headers = {"Authorization": f"Bearer {TOGETHER_API_KEY}"}
    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    data = resp.json()
    models_list = data if isinstance(data, list) else data.get("data", [])
    for m in models_list:
        if "llama" in m["id"].lower() and "instruct" in m["id"].lower():
            return m["id"]
    raise ValueError("No LLaMA instruct model found!")

LLAMA_MODEL = get_first_llama_model()
client = OpenAI(api_key=TOGETHER_API_KEY, base_url="https://api.together.xyz/v1")

In [None]:
# Search resources

def search_resources(condition):
    results = []
    with DDGS() as ddgs:
        for r in ddgs.text(f"{condition} mental health help site:.org", max_results=3):
            results.append({"title": r.get("title", "No title"), "link": r.get("href", "#")})
    return results

In [None]:
# Generate advice

def generate_advice(condition, user_text, resources):
    resources_str = "\n".join([f"- {r['title']}: {r['link']}" for r in resources])
    prompt = f"""
You are a compassionate mental health assistant.
User text: "{user_text}"
Predicted condition: {condition}

Provide safe, practical advice for the user.
Include these resources at the end:

{resources_str}
"""
    resp = client.chat.completions.create(
        model=LLAMA_MODEL,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300
    )
    return resp.choices[0].message.content

In [None]:
# Telegram bot handlers

async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text(
        "Hello 👋 I'm your Mental Health Advisor Bot. How are you feeling today?"
    )

async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_input = update.message.text
    pred_condition = predict_mental_health(user_input)
    links = search_resources(pred_condition)
    advice = generate_advice(pred_condition, user_input, links)

    response = f"🧠 *Predicted Condition:* {pred_condition}\n\n💡 *Advice:*\n{advice}\n\n🔗 *Resources:*\n"
    for l in links:
        response += f"- [{l['title']}]({l['link']})\n"

    await update.message.reply_text(response, parse_mode="Markdown", disable_web_page_preview=True)

In [None]:
# Start bot

import nest_asyncio
import asyncio

logging.basicConfig(level=logging.INFO)
app = ApplicationBuilder().token(TELEGRAM_TOKEN).build()

app.add_handler(CommandHandler("start", start))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))

print("🚀 Bot is running...")

# Allow nested event loops (needed for Colab/Jupyter)
nest_asyncio.apply()

# Run the bot without blocking the notebook
asyncio.get_event_loop().run_until_complete(app.run_polling())