# Importing libraries

In [None]:
import torch
import os
import warnings

import pandas as pd
import numpy as np

from bert_score import score
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, pipeline
from datasets import Dataset, load_metric

# Defining file paths

In [2]:
OUTPUT_DIR = "results"
LOG_DIR = "logs"
TRAIN_FILE = "dataset/train.txt"  # Path to your training data file
VAL_FILE = "dataset/val.txt"  # Path to your validation data file
TEST_FILE = "dataset/test.txt"  # Path to your test data file

# Loading Pre-trained Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
model.gradient_checkpointing_enable()
model.resize_token_embeddings(len(tokenizer))

Embedding(49152, 960, padding_idx=2)

In [4]:
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 960, padding_idx=2)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=960, out_features=960, bias=False)
          (k_proj): Linear(in_features=960, out_features=320, bias=False)
          (v_proj): Linear(in_features=960, out_features=320, bias=False)
          (o_proj): Linear(in_features=960, out_features=960, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=960, out_features=2560, bias=False)
          (up_proj): Linear(in_features=960, out_features=2560, bias=False)
          (down_proj): Linear(in_features=2560, out_features=960, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((960,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((960,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((960,), eps=1e-05)
    (rotary_emb)

In [5]:
def count_tokens(file_path, tokenizer):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    tokens = tokenizer.tokenize(text)
    return len(tokens)

# Count tokens in training and validation files
train_token_count = count_tokens(TRAIN_FILE, tokenizer)
eval_token_count = count_tokens(VAL_FILE, tokenizer)
print(f"Train Tokens: {train_token_count}")
print(f"Validation Tokens: {eval_token_count}")

Train Tokens: 7144
Validation Tokens: 1580


# Training the model

## Setting up training arguments

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    logging_strategy="epoch",     # Logs loss at intervals
    learning_rate=1e-5,
    per_device_train_batch_size=1,  # Reduced batch size for limited GPU memory
    per_device_eval_batch_size=1,
    num_train_epochs=10,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir=LOG_DIR,
    fp16=True,  # Enable mixed precision training for GPU efficiency
    bf16=False,  # Disable bf16 as it's not supported on all GPUs
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch sizes
)

## Loading dataset

In [7]:
def load_chatbot_data(file_path):
    """Load and preprocess chatbot data from the given text file."""
    conversations = []
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
        user_input, bot_response = None, None
        for line in lines:
            if line.startswith("User:"):
                user_input = line.replace("User:", "").strip()
            elif line.startswith("Bot:"):
                bot_response = line.replace("Bot:", "").strip()
                if user_input and bot_response:
                    conversations.append({"input": user_input, "output": bot_response})
                    user_input, bot_response = None, None
    return pd.DataFrame(conversations)

In [8]:
df_train = load_chatbot_data(TRAIN_FILE)
df_val = load_chatbot_data(VAL_FILE)
dataset_train = Dataset.from_pandas(df_train)
dataset_val = Dataset.from_pandas(df_val)

print(f"Length of training dataset: {len(df_train)}")
print(f"Length of validation dataset: {len(df_val)}")

Length of training dataset: 96
Length of validation dataset: 24


## Tokenizing dataset

In [None]:
def tokenize_function(examples):
    inputs = [f"{inp} {out}" for inp, out in zip(examples["input"], examples["output"])]
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors="pt",
    )
    model_inputs["labels"] = model_inputs["input_ids"].clone() 
    return model_inputs

# Apply tokenization
tokenized_train = dataset_train.map(tokenize_function, batched=True)
tokenized_val = dataset_val.map(tokenize_function, batched=True)

Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

## Data collator 

In [10]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # Masked language modeling is not used for causal LM
)

## Compute Metrics

In [None]:
esat_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def compute_bertscore(references, candidates, model_type="roberta-large"):
    """Compute BERTScore for chatbot responses."""
    P, R, F1 = score(candidates, references, model_type=model_type, lang="en", rescale_with_baseline=True)
    return {
        "Precision": P.mean().item(),
        "Recall": R.mean().item(),
        "F1 Score": F1.mean().item()
    }

def compute_esat(response):
    """Compute ESAT empathy scores for chatbot responses."""
    categories = ["Acknowledgment", "Interpretation", "Exploration", "Lack of Empathy"]
    result = esat_classifier(response, candidate_labels=categories, multi_label=True)
    return {label: round(score * 100, 2) for label, score in zip(result["labels"], result["scores"])}

accuracy_metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    """Compute accuracy, BERTScore, and ESAT for evaluation."""
    predictions, labels = eval_pred

    # Decode model outputs
    decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]
    decoded_labels = [tokenizer.decode(label, skip_special_tokens=True) for label in labels]

    # Token-level accuracy
    pred_ids = np.argmax(predictions, axis=-1)
    accuracy = accuracy_metric.compute(predictions=pred_ids, references=labels)

    # Compute BERTScore (similarity of response to ground truth)
    bert_scores = compute_bertscore(decoded_labels, decoded_preds)

    # Compute ESAT (Empathy Score)
    esat_scores = {"Acknowledgment": 0, "Interpretation": 0, "Exploration": 0, "Lack of Empathy": 0}
    num_samples = len(decoded_preds)

    for response in decoded_preds:
        esat_result = compute_esat(response)
        for key in esat_scores:
            esat_scores[key] += esat_result.get(key, 0)

    # Average ESAT scores
    esat_scores = {key: val / num_samples for key, val in esat_scores.items()}

    # Combine metrics
    final_metrics = {
        "Accuracy": accuracy["accuracy"],
        **bert_scores,
        **esat_scores,
    }

    print("\nEvaluation Metrics:", final_metrics)
    return final_metrics

## Initialising the model

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,No log,1.69058
2,No log,1.647575
3,No log,1.645658
4,No log,1.651312
5,No log,1.669912
6,No log,1.692904
7,No log,1.708447
8,No log,1.731985
9,No log,1.743749
10,No log,1.749091


TrainOutput(global_step=240, training_loss=1.0227577845255533, metrics={'train_runtime': 2454.4885, 'train_samples_per_second': 0.391, 'train_steps_per_second': 0.098, 'total_flos': 927896961024000.0, 'train_loss': 1.0227577845255533, 'epoch': 10.0})

## Saving Trained Model

In [12]:
model.save_pretrained("results")
tokenizer.save_pretrained("results")

('results\\tokenizer_config.json',
 'results\\special_tokens_map.json',
 'results\\vocab.json',
 'results\\merges.txt',
 'results\\added_tokens.json',
 'results\\tokenizer.json')

# Evaluating the model

## Load trained model

In [13]:
model = AutoModelForCausalLM.from_pretrained("results", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("results", device_map="auto")

model.to(device)



LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 960, padding_idx=2)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=960, out_features=960, bias=False)
          (k_proj): Linear(in_features=960, out_features=320, bias=False)
          (v_proj): Linear(in_features=960, out_features=320, bias=False)
          (o_proj): Linear(in_features=960, out_features=960, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=960, out_features=2560, bias=False)
          (up_proj): Linear(in_features=960, out_features=2560, bias=False)
          (down_proj): Linear(in_features=2560, out_features=960, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((960,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((960,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((960,), eps=1e-05)
    (rotary_emb)

In [14]:
def chatbot_response(prompt, max_length=100):
    # Tokenize input prompt
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Generate response
    outputs = model.generate(**inputs, max_length=max_length)

    # Decode the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [15]:
if __name__ == "__main__":
    print("Chatbot is ready! Type 'exit' to stop.")
    while True:
        user_input = input("User: ")
        if user_input.lower() == "exit":
            break
        response = chatbot_response(user_input)
        print(f"User: {user_input}")
        print(f"Bot: {response}")

Chatbot is ready! Type 'exit' to stop.
User: what is depression?
Bot: what is depression? Depression is a serious mental health condition that can affect how you feel, think, and behave. It can cause feelings of sadness, hopelessness, and loss of interest in things you once enjoyed. Depression can also impact your energy, motivation, and ability to function. If you think you or someone you know might be experiencing depression, it's important to talk to a healthcare provider. They can help you understand what you're going through and develop a treatment plan. What do you think?
