In [None]:
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

Load the data

In [2]:
train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("validation.csv")
test_df = pd.read_csv("test.csv")

Tokenization of text

In [4]:
model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

# Create a mapping from status labels to integers
label_map = {status: i for i, status in enumerate(train_df['status'].unique())}
id_map = {i: status for i, status in enumerate(train_df['status'].unique())}
print(id_map)

def tokenize(batch):
    # Map the status to numerical labels and add to the batch
    batch["label"] = [label_map[status] for status in batch["status"]]
    return tokenizer(batch["statement"], padding=True, truncation=True)

# Convert pandas DataFrames to Dataset objects
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

# Apply tokenization
train_dataset_encoded = train_dataset.map(tokenize, batched=True)
val_dataset_encoded = val_dataset.map(tokenize, batched=True)
test_dataset_encoded = test_dataset.map(tokenize, batched=True)

{0: 'Personality disorder', 1: 'Suicidal', 2: 'Depression', 3: 'Anxiety', 4: 'Normal', 5: 'Stress', 6: 'Bipolar'}


Map:   0%|          | 0/12360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1545 [00:00<?, ? examples/s]

Map:   0%|          | 0/1546 [00:00<?, ? examples/s]

Model building

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=7).to(device) # for full-finetuning

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
batch_size = 64
model_name = "distilbert-finetuned-mental-health"

training_args = TrainingArguments(output_dir = model_name,
                                  num_train_epochs=3,
                                  learning_rate = 2e-5,
                                  per_device_train_batch_size= batch_size,
                                  per_device_eval_batch_size = batch_size,
                                  weight_decay=0.01,
                                  eval_strategy = 'epoch',
                                  disable_tqdm=False,
                                  report_to = "none")

Define metrics

In [7]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  f1 = f1_score(labels, preds, average='weighted')
  acc = accuracy_score(labels, preds)
  return {"accuracy": acc, "f1": f1}

Train the model

In [8]:
trainer = Trainer(model=model,
                  args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=train_dataset_encoded,
                  eval_dataset=val_dataset_encoded,
                  tokenizer=tokenizer)
trainer.train()

  trainer = Trainer(model=model,


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.703865,0.742395,0.727231
2,No log,0.583368,0.785113,0.782443
3,0.781600,0.554159,0.8,0.79957


TrainOutput(global_step=582, training_loss=0.74146665330605, metrics={'train_runtime': 1750.3523, 'train_samples_per_second': 21.184, 'train_steps_per_second': 0.333, 'total_flos': 4912329125191680.0, 'train_loss': 0.74146665330605, 'epoch': 3.0})

Evaluate performance on test data

In [9]:
preds_outputs = trainer.predict(test_dataset_encoded)
preds_outputs.metrics

{'test_loss': 0.5453797578811646,
 'test_accuracy': 0.8085381630012937,
 'test_f1': 0.8084788440200407,
 'test_runtime': 23.8395,
 'test_samples_per_second': 64.85,
 'test_steps_per_second': 1.049}

In [10]:
y_preds = np.argmax(preds_outputs.predictions, axis=1)
y_true = [label for label in test_dataset_encoded["label"]]
print(id_map)
print(classification_report(y_true, y_preds, zero_division=0))

{0: 'Personality disorder', 1: 'Suicidal', 2: 'Depression', 3: 'Anxiety', 4: 'Normal', 5: 'Stress', 6: 'Bipolar'}
              precision    recall  f1-score   support

           0       0.75      0.62      0.68        72
           1       0.72      0.81      0.76       302
           2       0.76      0.69      0.72       309
           3       0.86      0.86      0.86       239
           4       0.94      0.91      0.92       289
           5       0.74      0.81      0.77       182
           6       0.89      0.85      0.87       153

    accuracy                           0.81      1546
   macro avg       0.81      0.79      0.80      1546
weighted avg       0.81      0.81      0.81      1546



Testing for custom input

In [11]:
text = "I want to die."
inputs = tokenizer(text, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=1)
predicted_label_id = predictions.item()
print(f"Predicted label ID: {predicted_label_id}")
print(f"Predicted label: {id_map[predicted_label_id]}")

Predicted label ID: 1
Predicted label: Suicidal
