In [None]:
# Install necessary Python packages for Hugging Face Transformers, Datasets, Evaluate, and Accelerate.
# The '-q' flag makes the installation quiet, suppressing verbose output.
%pip install transformers datasets evaluate accelerate -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd

# Load the dataset from the specified CSV file into a pandas DataFrame.
df = pd.read_csv('/content/final_dataset.csv')
# Display the first few rows of the DataFrame to get an overview of the data.
display(df.head())

# Calculate the count of each unique label in the 'label' column.
label_counts = df['label'].value_counts()
# Display the distribution of labels.
display(label_counts)

# Calculate the word count for each text entry and store it in a new 'word_count' column.
df['word_count'] = df['text'].apply(lambda x: len(x.split()))
# Calculate the average word count across all text entries.
average_word_count = df['word_count'].mean()
# Print the average word count, formatted to two decimal places.
print(f"The average word count of each text is: {average_word_count:.2f}")

Unnamed: 0,text,label
0,my advisor asked me to drop by her office and ...,anxiety
1,i just cant sit still or focus and i keep chec...,stress
2,i let my email inbox pile up because the thoug...,burnout
3,everyone pities me and looks down at me for my...,depression
4,i fixed five complex hardware issues today but...,burnout


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
depression,7030
anxiety,5710
stress,5157
normal,4326
burnout,3511


The average word count of each text is: 91.27


In [None]:
# Calculate the length of the shortest text in terms of word count.
shortest_text_length = df['word_count'].min()
# Calculate the length of the longest text in terms of word count.
longest_text_length = df['word_count'].max()

# Find the actual shortest text from the DataFrame using its word count.
shortest_text = df[df['word_count'] == shortest_text_length]['text'].iloc[0]
# Find the actual longest text from the DataFrame using its word count.
longest_text = df[df['word_count'] == longest_text_length]['text'].iloc[0]

# Print the shortest text and its length.
print(f"Shortest text (length {shortest_text_length}): '{shortest_text}'")
# Print the longest text and its length.
print(f"Longest text (length {longest_text_length}): '{longest_text}'")

Shortest text (length 30): 'i tried to watch a movie tonight but i couldnt focus i kept checking my phone and feeling antsy im completely wound up and cant seem to decompress after work'
Longest text (length 989): 'i live with two people in my family who have schizophrenia and there was a butcher knife placed on a picture of me im a 19 year old cc student and i live with my grandma mom uncle aunt and little sister my mom has a meth addiction and shes developed schizophrenia from it and its been getting worse as the years go on my uncle was hit on the head at a young age and im not exactly sure what he struggles since we never talk but i think its schizophrenia my mom doesnt take her meds i think and my grandma is constantly trying to help her with moving in and out of motels and rehabs and all this stuff for the past 8 months or so realistically shes been trying to help her for many many years now but my mom had moved out recently my mom moved back in like a week ago my uncle is very s

In [None]:
import torch
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split

# Map unique labels to numerical values
label_map = {label: i for i, label in enumerate(df['label'].unique())}
df['numerical_label'] = df['label'].map(label_map)

# Initialize the tokenizer for roberta-base model
tokenizer = AutoTokenizer.from_pretrained('roberta-base')

# Tokenize text data, add padding and truncation
encoded_data = tokenizer(df['text'].tolist(), padding=True, truncation=True, max_length=512, return_tensors='pt')

# Prepare data for model input
processed_data = {
    'input_ids': encoded_data['input_ids'],
    'attention_mask': encoded_data['attention_mask'],
    'labels': df['numerical_label'].values
}

# Group data by label to ensure stratified splitting
grouped_data = df.groupby('numerical_label')

train_inputs_list = []
validation_inputs_list = []
train_masks_list = []
validation_masks_list = []
train_labels_list = []
validation_labels_list = []

# Split each label group into training and validation sets
for label, group in grouped_data:
    group_indices = group.index.tolist()

    train_indices, validation_indices = train_test_split(
        group_indices,
        test_size=0.3,
        random_state=42,
        stratify=group['numerical_label']
    )

    # Collect split data for concatenation
    train_inputs_list.append(processed_data['input_ids'][train_indices])
    validation_inputs_list.append(processed_data['input_ids'][validation_indices])
    train_masks_list.append(processed_data['attention_mask'][train_indices])
    validation_masks_list.append(processed_data['attention_mask'][validation_indices])
    train_labels_list.extend(processed_data['labels'][train_indices])
    validation_labels_list.extend(processed_data['labels'][validation_indices])

# Concatenate all collected tensors to form final datasets
train_inputs = torch.cat(train_inputs_list)
validation_inputs = torch.cat(validation_inputs_list)
train_masks = torch.cat(train_masks_list)
validation_masks = torch.cat(validation_masks_list)
train_labels = torch.tensor(train_labels_list)
validation_labels = torch.tensor(validation_labels_list)

print("Data preprocessing and splitting complete.")
print(f"Training set size: {len(train_inputs)}")
print(f"Validation set size: {len(validation_inputs)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Data preprocessing and splitting complete.
Training set size: 18012
Validation set size: 7722


In [None]:
from transformers import AutoModelForSequenceClassification, pipeline
import torch

# Create the reverse_label_map dictionary to convert numerical labels back to original labels
reverse_label_map = {index: label for label, index in label_map.items()}

# Determine the number of unique labels for model configuration
num_labels = len(label_map)

# Load the RoBERTa-base model for sequence classification with the determined number of labels
model = AutoModelForSequenceClassification.from_pretrained('roberta-base', num_labels=num_labels)

# Initialize a sentiment analysis pipeline using a pre-trained model
sentiment_analyzer = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest')

# Initialize an emotion analysis pipeline using a pre-trained model, set to return all emotions
emotion_analyzer = pipeline('text-classification', model='SamLowe/roberta-base-go_emotions', top_k=None)

# Move the loaded classification model to the appropriate device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print("All models and mappers loaded and configured.")

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


All models and mappers loaded and configured.


In [None]:
def freeze_lower_layers(model, freeze_ratio=0.7):
    encoder_layers = None
    # Check if the model has a 'roberta' attribute to access encoder layers
    if hasattr(model, "roberta"):
        encoder_layers = model.roberta.encoder.layer
    else:
        print("Warning: Model does not have a 'roberta' attribute for encoder layers. Freezing might not apply as expected.")

    if encoder_layers:
        # Calculate the number of layers to freeze based on the ratio
        freeze_until = int(len(encoder_layers) * freeze_ratio)
        print(f"Freezing {freeze_until} out of {len(encoder_layers)} encoder layers.")
        for i, layer in enumerate(encoder_layers):
            # Freeze parameters for layers up to 'freeze_until'
            if i < freeze_until:
                for param in layer.parameters():
                    param.requires_grad = False
            # Unfreeze parameters for layers after 'freeze_until'
            else:
                for param in layer.parameters():
                    param.requires_grad = True
        print("Model layers frozen according to freeze_ratio.")
    else:
        print("No identifiable encoder layers found for freezing.")

# Apply the layer freezing function with a 0.7 freeze ratio
freeze_lower_layers(model, freeze_ratio=0.7)

# Verify the requires_grad status of model parameters
print("\nVerifying parameter requires_grad status:")
for name, param in model.named_parameters():
    if 'encoder.layer' in name:
        print(f"{name}: requires_grad = {param.requires_grad}")

Freezing 8 out of 12 encoder layers.
Model layers frozen according to freeze_ratio.

Verifying parameter requires_grad status:
roberta.encoder.layer.0.attention.self.query.weight: requires_grad = False
roberta.encoder.layer.0.attention.self.query.bias: requires_grad = False
roberta.encoder.layer.0.attention.self.key.weight: requires_grad = False
roberta.encoder.layer.0.attention.self.key.bias: requires_grad = False
roberta.encoder.layer.0.attention.self.value.weight: requires_grad = False
roberta.encoder.layer.0.attention.self.value.bias: requires_grad = False
roberta.encoder.layer.0.attention.output.dense.weight: requires_grad = False
roberta.encoder.layer.0.attention.output.dense.bias: requires_grad = False
roberta.encoder.layer.0.attention.output.LayerNorm.weight: requires_grad = False
roberta.encoder.layer.0.attention.output.LayerNorm.bias: requires_grad = False
roberta.encoder.layer.0.intermediate.dense.weight: requires_grad = False
roberta.encoder.layer.0.intermediate.dense.bias:

In [None]:
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback, DataCollatorWithPadding
from datasets import Dataset
import torch
import evaluate
import numpy as np
from torch.optim import AdamW

# Load evaluation metrics
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

# Function to compute and return various metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    precision = precision_metric.compute(predictions=predictions, references=labels, average='weighted')
    recall = recall_metric.compute(predictions=predictions, references=labels, average='weighted')
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')
    return {
        "accuracy": accuracy["accuracy"],
        "precision": precision["precision"],
        "recall": recall["recall"],
        "f1": f1["f1"],
    }

# Configure training arguments for the Trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_ratio=0.1,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    eval_steps=1,
    save_strategy="epoch",
    save_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    report_to="none",
    fp16=True,
    learning_rate=5e-5,
    gradient_accumulation_steps=2,
    lr_scheduler_type="cosine"
)

# Create Hugging Face Dataset objects for training and validation
train_dataset = Dataset.from_dict({
    'input_ids': train_inputs,
    'attention_mask': train_masks,
    'labels': train_labels,
})

validation_dataset = Dataset.from_dict({
    'input_ids': validation_inputs,
    'attention_mask': validation_masks,
    'labels': validation_labels,
})

# Initialize the optimizer for model training
optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)

# Instantiate DataCollatorWithPadding for dynamic padding during training
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=512, padding='max_length')

# Initialize the Hugging Face Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    optimizers=(optimizer, None),
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

# Start the model training process
trainer.train()
print("Model training complete.")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3575,0.270141,0.901968,0.903216,0.901968,0.901697
2,0.2542,0.240985,0.914659,0.915985,0.914659,0.914398
3,0.0919,0.263182,0.916472,0.918805,0.916472,0.916461
4,0.0524,0.278955,0.925667,0.925981,0.925667,0.925667
5,0.0506,0.297269,0.923595,0.924134,0.923595,0.923518


Model training complete.


In [None]:
print(f"validation set size: {len(validation_dataset)}")

eval_results = trainer.evaluate(eval_dataset=validation_dataset)

print("Evaluation results on original validation dataset:")
display(eval_results)

validation set size: 7722


Evaluation results on original validation dataset:


{'eval_loss': 0.27895477414131165,
 'eval_accuracy': 0.9256669256669257,
 'eval_precision': 0.9259811522997963,
 'eval_recall': 0.9256669256669257,
 'eval_f1': 0.9256671979480124,
 'eval_runtime': 54.2763,
 'eval_samples_per_second': 142.272,
 'eval_steps_per_second': 2.229,
 'epoch': 5.0}

In [None]:
def perform_all_analyses(input_text):
    """
    Performs mental health classification, sentiment analysis, and emotion analysis
    on a given input text.

    Args:
        input_text (str): The text to be analyzed.

    Returns:
        dict: A dictionary containing the predicted mental health label, sentiment
              results, and top emotion results.
    """
    # Tokenize the input text for the mental health classification model
    encoded_input = tokenizer(input_text, padding=True, truncation=True, return_tensors='pt')

    # Move tokenized inputs to the correct device (GPU/CPU)
    input_ids = encoded_input['input_ids'].to(device)
    attention_mask = encoded_input['attention_mask'].to(device)

    # Set the model to evaluation mode and perform inference
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        prediction = torch.argmax(logits, axis=-1).item()
    # Convert numerical prediction back to original mental health label
    mental_health_label = reverse_label_map[prediction]

    # Perform sentiment analysis using the pre-trained pipeline
    sentiment_result = sentiment_analyzer(input_text)[0]

    # Perform emotion analysis using the pre-trained pipeline
    emotion_raw_results = emotion_analyzer(input_text)[0]
    # Sort emotions by score in descending order
    sorted_emotions = sorted(emotion_raw_results, key=lambda x: x['score'], reverse=True)
    # Get the top 3 emotions
    top_emotions = sorted_emotions[:3]

    # Return a comprehensive dictionary of all analysis results
    return {
        'mental_health_prediction': mental_health_label,
        'sentiment_analysis': {
            'label': sentiment_result['label'],
            'score': sentiment_result['score']
        },
        'emotion_analysis': top_emotions
    }

print("The 'perform_all_analyses' function has been defined.")

The 'perform_all_analyses' function has been defined.


In [None]:
sample_user_input = "Public speaking makes me feel like I can't breathe, and my hands start shaking uncontrollably."
analysis_result = perform_all_analyses(sample_user_input)

# Print the analysis for the sample user input
print(f"\nAnalysis for user input: \"{sample_user_input}\"\n")
print("---------------------------------------------------")
print(f"Mental Health Prediction: {analysis_result['mental_health_prediction']}")
print(f"Sentiment: {analysis_result['sentiment_analysis']['label']} (Score: {analysis_result['sentiment_analysis']['score']:.4f})")
print("Top Emotions:")
for emotion in analysis_result['emotion_analysis']:
    print(f"  - {emotion['label']}: {emotion['score']:.4f}")


Analysis for user input: "Public speaking makes me feel like I can't breathe, and my hands start shaking uncontrollably."

---------------------------------------------------
Mental Health Prediction: anxiety
Sentiment: negative (Score: 0.8663)
Top Emotions:
  - fear: 0.3250
  - sadness: 0.3177
  - nervousness: 0.2219


In [None]:
custom_sample_texts = {
    'depression': [
        "I've been feeling an overwhelming sense of sadness and hopelessness for weeks now.",
        "Nothing brings me joy anymore, and I struggle to even get out of bed."
    ],
    'burnout': [
        "I'm completely exhausted, both mentally and physically, and feel detached from my work.",
        "Every task feels monumental, and I have no energy left at the end of the day."
    ],
    'stress': [
        "I have so many deadlines approaching, and I can't seem to focus on anything.",
        "My heart is racing, and I can't stop worrying about all the things I need to do."
    ],
    'anxiety': [
        "I constantly feel on edge and can't shake off this feeling of impending doom.",
        "Social situations make me incredibly nervous, and I worry excessively about what others think."
    ],
    'normal': [
        "I had a pleasant day, accomplished my tasks, and enjoyed some leisure time.",
        "Feeling good and looking forward to the weekend with friends and family."
    ]
}

print("--- Model Predictions on Custom Sample Texts ---")
for label, texts in custom_sample_texts.items():
    for i, text in enumerate(texts):
        print(f"\nOriginal Label: {label} (Sample {i+1})")
        print(f"Sample Text: '{text}'")
        analysis_result = perform_all_analyses(text)
        print(f"  Predicted Mental Health: {analysis_result['mental_health_prediction']}")
        print(f"  Sentiment: {analysis_result['sentiment_analysis']['label']} (Score: {analysis_result['sentiment_analysis']['score']:.4f})")
        print("  Top Emotions:")
        for emotion in analysis_result['emotion_analysis']:
            print(f"    - {emotion['label']}: {emotion['score']:.4f}")
        print("---------------------------------------------------")

--- Model Predictions on Custom Sample Texts ---

Original Label: depression (Sample 1)
Sample Text: 'I've been feeling an overwhelming sense of sadness and hopelessness for weeks now.'
  Predicted Mental Health: depression
  Sentiment: negative (Score: 0.9261)
  Top Emotions:
    - sadness: 0.8833
    - disappointment: 0.1102
    - grief: 0.0226
---------------------------------------------------

Original Label: depression (Sample 2)
Sample Text: 'Nothing brings me joy anymore, and I struggle to even get out of bed.'
  Predicted Mental Health: depression
  Sentiment: negative (Score: 0.9406)
  Top Emotions:
    - disappointment: 0.4891
    - sadness: 0.2590
    - annoyance: 0.1270
---------------------------------------------------

Original Label: burnout (Sample 1)
Sample Text: 'I'm completely exhausted, both mentally and physically, and feel detached from my work.'
  Predicted Mental Health: burnout
  Sentiment: negative (Score: 0.8925)
  Top Emotions:
    - sadness: 0.5949
    - 

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


  Predicted Mental Health: anxiety
  Sentiment: negative (Score: 0.9067)
  Top Emotions:
    - nervousness: 0.4685
    - fear: 0.2067
    - sadness: 0.1913
---------------------------------------------------

Original Label: anxiety (Sample 2)
Sample Text: 'Social situations make me incredibly nervous, and I worry excessively about what others think.'
  Predicted Mental Health: anxiety
  Sentiment: negative (Score: 0.8411)
  Top Emotions:
    - nervousness: 0.5786
    - fear: 0.1672
    - caring: 0.1275
---------------------------------------------------

Original Label: normal (Sample 1)
Sample Text: 'I had a pleasant day, accomplished my tasks, and enjoyed some leisure time.'
  Predicted Mental Health: normal
  Sentiment: positive (Score: 0.9860)
  Top Emotions:
    - joy: 0.8545
    - approval: 0.0504
    - relief: 0.0353
---------------------------------------------------

Original Label: normal (Sample 2)
Sample Text: 'Feeling good and looking forward to the weekend with friends a

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define a path in Google Drive to save your model
output_dir = '/content/drive/My Drive/mental_health_model_roberta'

# Save the model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

# Also save the label_map and reverse_label_map
import json
with open(f"{output_dir}/label_map.json", "w") as f:
    json.dump(label_map, f)
with open(f"{output_dir}/reverse_label_map.json", "w") as f:
    json.dump(reverse_label_map, f)

print(f"Model, tokenizer, and label maps saved to {output_dir}")

Model, tokenizer, and label maps saved to /content/drive/My Drive/mental_health_model_roberta


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import json

# Define the path where the model was saved in Google Drive
output_dir = '/content/drive/My Drive/mental_health_model_roberta'

# Load the tokenizer from the saved directory, explicitly using the slow tokenizer
tokenizer = AutoTokenizer.from_pretrained(output_dir, use_fast=False)

# Load the label maps from the saved directory
with open(f"{output_dir}/label_map.json", "r") as f:
    label_map = json.load(f)
with open(f"{output_dir}/reverse_label_map.json", "r") as f:
    reverse_label_map_str_keys = json.load(f)

# Convert keys of reverse_label_map back to integers as they were saved as strings
reverse_label_map = {int(k): v for k, v in reverse_label_map_str_keys.items()}

# Determine the number of unique labels for configuring the model
num_labels = len(label_map)

# Load the fine-tuned sequence classification model from the saved directory
model = AutoModelForSequenceClassification.from_pretrained(output_dir, num_labels=num_labels)

# Move the loaded model to the appropriate device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Load a pre-trained sentiment analysis pipeline
sentiment_analyzer = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest')

# Load a pre-trained emotion analysis pipeline, configured to return all emotions
emotion_analyzer = pipeline('text-classification', model='SamLowe/roberta-base-go_emotions', top_k=None)

print("Saved model, tokenizer, label maps, and analysis pipelines loaded successfully.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


Saved model, tokenizer, label maps, and analysis pipelines loaded successfully.


In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import Dataset
import evaluate
import numpy as np
import torch

# Load evaluation metrics
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    precision = precision_metric.compute(predictions=predictions, references=labels, average='weighted')
    recall = recall_metric.compute(predictions=predictions, references=labels, average='weighted')
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')
    return {
        "accuracy": accuracy["accuracy"],
        "precision": precision["precision"],
        "recall": recall["recall"],
        "f1": f1["f1"],
    }

# Recreate Hugging Face Dataset objects for training and validation
validation_dataset = Dataset.from_dict({
    'input_ids': validation_inputs,
    'attention_mask': validation_masks,
    'labels': validation_labels,
})

# Define minimal training arguments for evaluation purposes
training_args_eval = TrainingArguments(
    output_dir='./results_eval',
    per_device_eval_batch_size=64,
    report_to="none",
)

# Instantiate DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=512, padding='max_length')

# Create a new Trainer instance with the loaded model
trainer_eval = Trainer(
    model=model,
    args=training_args_eval,
    eval_dataset=validation_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

print(f"Validation set size: {len(validation_dataset)}")

eval_results = trainer_eval.evaluate()

print("Evaluation results on validation dataset with loaded model:")
display(eval_results)

Validation set size: 7722


Evaluation results on validation dataset with loaded model:


{'eval_loss': 0.27895477414131165,
 'eval_model_preparation_time': 0.0233,
 'eval_accuracy': 0.9256669256669257,
 'eval_precision': 0.9259811522997963,
 'eval_recall': 0.9256669256669257,
 'eval_f1': 0.9256671979480124,
 'eval_runtime': 63.7302,
 'eval_samples_per_second': 121.167,
 'eval_steps_per_second': 1.899}

In [None]:
sample_user_input = "Looking ahead feels like peering into a nothingness where nothing changes save the level of greyness. I feel like I'm failing everyone, and this realisation only adds to my sense of hopelessness and despair. There is no reprieve; only the same stale, heavy air every day."
analysis_result = perform_all_analyses(sample_user_input)

print(f"\nAnalysis for user input: \"{sample_user_input}\"\n")
print("---------------------------------------------------")
print(f"Mental Health Prediction: {analysis_result['mental_health_prediction']}")
print(f"Sentiment: {analysis_result['sentiment_analysis']['label']} (Score: {analysis_result['sentiment_analysis']['score']:.4f})")
print("Top Emotions:")
for emotion in analysis_result['emotion_analysis']:
    print(f"  - {emotion['label']}: {emotion['score']:.4f}")


Analysis for user input: "Looking ahead feels like peering into a nothingness where nothing changes save the level of greyness. I feel like I'm failing everyone, and this realisation only adds to my sense of hopelessness and despair. There is no reprieve; only the same stale, heavy air every day."

---------------------------------------------------
Mental Health Prediction: depression
Sentiment: negative (Score: 0.9262)
Top Emotions:
  - disappointment: 0.4993
  - sadness: 0.4825
  - neutral: 0.0472


In [None]:
!pip install gradio huggingface_hub -q

In [None]:
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import json

# Define the directory where the model and associated files are saved
output_dir = '/content/drive/My Drive/mental_health_model_roberta'

# Load the tokenizer from the specified directory
tokenizer = AutoTokenizer.from_pretrained(output_dir, use_fast=False)

# Load label mappings from JSON files
with open(f"{output_dir}/label_map.json", "r") as f:
    label_map = json.load(f)
with open(f"{output_dir}/reverse_label_map.json", "r") as f:
    reverse_label_map = {int(k): v for k, v in json.load(f).items()} # Convert keys to int

# Determine the number of labels for model configuration
num_labels = len(label_map)

# Load the fine-tuned sequence classification model
model = AutoModelForSequenceClassification.from_pretrained(output_dir, num_labels=num_labels)

# Move the model to the appropriate device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Initialize pre-trained sentiment and emotion analysis pipelines
sentiment_analyzer = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest', device=0 if torch.cuda.is_available() else -1)
emotion_analyzer = pipeline('text-classification', model='SamLowe/roberta-base-go_emotions', top_k=None, device=0 if torch.cuda.is_available() else -1)


def perform_all_analyses(input_text):
    """
    Performs mental health classification, sentiment analysis, and emotion analysis
    on a given input text.
    """
    # Tokenize input for the mental health classification model
    encoded_input = tokenizer(input_text, padding=True, truncation=True, max_length=512, return_tensors='pt')

    # Move inputs to the designated device
    input_ids = encoded_input['input_ids'].to(device)
    attention_mask = encoded_input['attention_mask'].to(device)

    # Perform mental health prediction
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        prediction = torch.argmax(logits, axis=-1).item()
    mental_health_label = reverse_label_map[prediction]

    # Perform sentiment analysis
    sentiment_result = sentiment_analyzer(input_text)[0]

    # Perform emotion analysis and get top 3 emotions
    emotion_raw_results = emotion_analyzer(input_text)[0]
    sorted_emotions = sorted(emotion_raw_results, key=lambda x: x['score'], reverse=True)
    top_emotions = sorted_emotions[:3]

    # Return all analysis results
    return {
        'mental_health_prediction': mental_health_label,
        'sentiment_analysis': {
            'label': sentiment_result['label'],
            'score': sentiment_result['score']
        },
        'emotion_analysis': top_emotions
    }

# Setup the Generative Model (The "Voice" of the bot)
# Retrieve the token from Colab secrets. Make sure you've saved your Hugging Face token under 'HF_TOKEN'.
HUGGING_FACE_TOKEN = userdata.get('HF_TOKEN')
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=HUGGING_FACE_TOKEN)

def generate_smart_response(user_text, analysis_result):
    """
    Generates a human-like, empathetic response based on analysis results.
    """
    status = analysis_result['mental_health_prediction']
    sentiment = analysis_result['sentiment_analysis']['label']
    primary_emotion = analysis_result['emotion_analysis'][0]['label']
    top_emotions_str = ", ".join([f"{e['label']} (score: {e['score']:.2f})" for e in analysis_result['emotion_analysis'][:3]])

    # Conditional guidance for the LLM based on perceived severity
    severity_guidance = ""
    if status in ['depression', 'anxiety', 'burnout', 'stress']:
        severity_guidance = (
            "Given the nature of their feelings, gently suggest exploring ways to support their well-being, "
            "such as talking to someone they trust, engaging in self-care, or seeking professional guidance. "
            "Integrate this suggestion naturally into your response."+
            "Keep the response short (under 3 sentences) and supportive."
        )

    # Construct the prompt for the LLM
    system_prompt = (
        f"You are a compassionate mental health support assistant. "
        f"The user just said: '{user_text}'. "
        f"Based on an internal analysis, I've identified: "
        f"- Mental state: '{status}'\n"
        f"- Overall sentiment: '{sentiment}'\n"
        f"- Top emotions: {top_emotions_str}. "
        f"Please respond in a warm, validating, and human-like way. "
        f"First, empathetically acknowledge these insights (mental state, sentiment, and primary emotion) directly. "
        f"Then, offer supportive words and a practical suggestion relevant to their mental state. "
        f"Keep your response concise, around 2-4 sentences, combining acknowledgment and support naturally. "
        f"{severity_guidance}"
    )

    messages = [{"role": "system", "content": system_prompt},
                {"role": "user", "content": user_text}]

    # Generate response using the inference client
    response = ""
    for message in client.chat_completion(messages, max_tokens=150, stream=True):
        token = message.choices[0].delta.content
        response += token

    return response

def chat_logic(message, history):
    """
    Main chat logic: analyzes user input and generates a bot response.
    """
    # Run backend analysis on the user message
    raw_analysis = perform_all_analyses(message)

    # Generate a human-like response
    bot_response = generate_smart_response(message, raw_analysis)

    return bot_response

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Mental Health Companion")
    gr.Markdown("I'm here to listen. Tell me how you are feeling today.")

    # Initialize the chatbot component with an initial bot message
    chatbot = gr.Chatbot(
        value=[{"role": "assistant", "content": "Hello. I'm here to support you. How are you feeling right now?"}],
        height=400,
        type="messages"
    )

    msg = gr.Textbox(placeholder="Type your feelings here...", show_label=False)
    clear = gr.ClearButton([msg, chatbot])

    def user_interaction(user_message, history):
        # Append user message to history and clear input box
        history.append({"role": "user", "content": user_message})
        return "", history

    def bot_interaction(history):
        # Get the last user message from history
        user_message = history[-1]['content']

        # Get bot response
        response_text = chat_logic(user_message, history)

        # Append bot response to history
        history.append({"role": "assistant", "content": response_text})
        return history

    # Configure chat interaction flow
    msg.submit(user_interaction, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot_interaction, [chatbot], [chatbot]
    )

# Launch the Gradio application
demo.launch(share=True, debug=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot(


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://4f9582089b9ffb3dc8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_http.py", line 402, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.12/dist-packages/requests/models.py", line 1026, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://router.huggingface.co/featherless-ai/v1/chat/completions

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/pyth

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://4f9582089b9ffb3dc8.gradio.live




In [None]:
import evaluate
import numpy as np
import json
import os
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import Dataset

# Define the output directory for loading label maps
output_dir = '/content/drive/My Drive/mental_health_model_roberta'

# Load the reverse_label_map to convert numerical labels back to original labels
with open(f"{output_dir}/reverse_label_map.json", "r") as f:
    reverse_label_map = {int(k): v for k, v in json.load(f).items()}

# Load evaluation metrics
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

def compute_metrics_per_class(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    metrics = {}

    # Calculate overall metrics with weighted average
    metrics["overall_accuracy"] = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    metrics["overall_precision_weighted"] = precision_metric.compute(predictions=predictions, references=labels, average='weighted', zero_division=0)["precision"]
    metrics["overall_recall_weighted"] = recall_metric.compute(predictions=predictions, references=labels, average='weighted', zero_division=0)["recall"]
    metrics["overall_f1_weighted"] = f1_metric.compute(predictions=predictions, references=labels, average='weighted')["f1"]

    # Calculate per-class metrics
    per_class_precision = precision_metric.compute(predictions=predictions, references=labels, average=None, zero_division=0)["precision"]
    per_class_recall = recall_metric.compute(predictions=predictions, references=labels, average=None, zero_division=0)["recall"]
    per_class_f1 = f1_metric.compute(predictions=predictions, references=labels, average=None)["f1"]

    # Map numerical labels to class names and add to metrics dictionary
    for i, class_name in reverse_label_map.items():
        if i < len(per_class_precision) and i < len(per_class_recall) and i < len(per_class_f1):
            metrics[f"{class_name}_precision"] = per_class_precision[i]
            metrics[f"{class_name}_recall"] = per_class_recall[i]
            metrics[f"{class_name}_f1"] = per_class_f1[i]
        else:
            metrics[f"{class_name}_precision"] = np.nan
            metrics[f"{class_name}_recall"] = np.nan
            metrics[f"{class_name}_f1"] = np.nan

    return metrics

# Define minimal training arguments for evaluation
training_args_eval = TrainingArguments(
    output_dir='./results_eval',
    per_device_eval_batch_size=64,
    report_to="none",
)

# Instantiate DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=512, padding='max_length')

# Create a Trainer instance for evaluation with the loaded model and custom metrics
trainer_eval = Trainer(
    model=model,
    args=training_args_eval,
    eval_dataset=validation_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics_per_class,
)

print("Evaluating model performance with per-class metrics...")
# Perform evaluation
per_class_eval_results = trainer_eval.evaluate(eval_dataset=validation_dataset)

print("\nEvaluation results with per-class breakdown:")
display(per_class_eval_results)

Evaluating model performance with per-class metrics...



Evaluation results with per-class breakdown:


{'eval_loss': 0.27895477414131165,
 'eval_model_preparation_time': 0.0046,
 'eval_overall_accuracy': 0.9256669256669257,
 'eval_overall_precision_weighted': 0.9259811522997963,
 'eval_overall_recall_weighted': 0.9256669256669257,
 'eval_overall_f1_weighted': 0.9256671979480124,
 'eval_anxiety_precision': 0.9325779036827195,
 'eval_anxiety_recall': 0.960887332165791,
 'eval_anxiety_f1': 0.9465209890741806,
 'eval_stress_precision': 0.8756345177664975,
 'eval_stress_recall': 0.8914728682170543,
 'eval_stress_f1': 0.8834827144686299,
 'eval_burnout_precision': 0.9405940594059405,
 'eval_burnout_recall': 0.9013282732447818,
 'eval_burnout_f1': 0.9205426356589147,
 'eval_depression_precision': 0.9384761003312825,
 'eval_depression_recall': 0.9402560455192034,
 'eval_depression_f1': 0.9393652297489341,
 'eval_normal_precision': 0.9451510333863276,
 'eval_normal_recall': 0.9160246533127889,
 'eval_normal_f1': 0.9303599374021909,
 'eval_runtime': 67.8782,
 'eval_samples_per_second': 113.763,
 