# Importing libraries

In [2]:
import torch
import os
import psycopg2
import evaluate
import time 

import pandas as pd
import numpy as np

from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, pipeline, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer
from datasets import Dataset
from dotenv import load_dotenv

# Defining file paths

In [3]:
OUTPUT_DIR = "results"
LOG_DIR = "logs"
TRAIN_FILE = "dataset/train.txt"  # Path to your training data file
VAL_FILE = "dataset/validation.txt"  # Path to your validation data file
TEST_FILE = "dataset/test.txt"  # Path to your test data file

# Setting the device

In [4]:
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


# Loading Pre-trained Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")

model.to(device)

In [None]:
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")

In [None]:
original_vocab_size = model.config.vocab_size
print(f"Original vocab size: {original_vocab_size}")

In [None]:
print(f"Model max length: {model.config.max_position_embeddings}")

In [None]:
print(f"Max sequence length: {tokenizer.model_max_length}")

In [None]:
model.gradient_checkpointing_enable()
model.resize_token_embeddings(len(tokenizer))

In [None]:
# model.config.use_cache = False

In [None]:
def count_tokens(file_path, tokenizer):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    tokens = tokenizer.tokenize(text)
    return len(tokens)

# Count tokens in training and validation files
train_token_count = count_tokens(TRAIN_FILE, tokenizer)
eval_token_count = count_tokens(VAL_FILE, tokenizer)
print(f"Train Tokens: {train_token_count}")
print(f"Validation Tokens: {eval_token_count}")

# Training the model

## Setting up training arguments

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    logging_strategy="epoch",     # Logs loss at intervals
    learning_rate=5e-6,
    per_device_train_batch_size=2,  # Reduced batch size for limited GPU memory
    per_device_eval_batch_size=2,
    num_train_epochs=10,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir=LOG_DIR,
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch sizes
    fp16=True,
    bf16=False,
    optim="adamw_torch",
)

## Loading dataset

In [None]:
def load_chatbot_data(file_path):
    """Load and preprocess chatbot data from the given text file."""
    conversations = []
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
        user_input, bot_response = None, None
        for line in lines:
            if line.startswith("User:"):
                user_input = line.replace("user:", "").strip()
            elif line.startswith("Bot:"):
                bot_response = line.replace("bot:", "").strip()
                if user_input and bot_response:
                    conversations.append({"input": user_input, "output": bot_response})
                    user_input, bot_response = None, None
    return pd.DataFrame(conversations)

In [None]:
df_train = load_chatbot_data(TRAIN_FILE)
df_val = load_chatbot_data(VAL_FILE)
dataset_train = Dataset.from_pandas(df_train)
dataset_val = Dataset.from_pandas(df_val)

print(f"Length of training dataset: {len(df_train)}")
print(f"Length of validation dataset: {len(df_val)}")

## Tokenizing dataset

In [None]:
# def find_max_interaction_length(dataset):
#     length_distribution = []

#     for user_msg, bot_msg in zip(dataset["input"], dataset["output"]):
#         # Combine user message and bot response
#         text_pair = f"User: {user_msg} Bot: {bot_msg}"

#         # Tokenize
#         tokenized = tokenizer(text_pair, truncation=False, padding=False)

#         # Get the token length of this interaction
#         num_tokens = len(tokenized["input_ids"])
#         length_distribution.append(num_tokens)

#     # Find the maximum token length
#     max_length = max(length_distribution)

#     return max_length, length_distribution

# dataset = load_chatbot_data("dataset/dataset.txt")
# max_len, token_lengths = find_max_interaction_length(dataset)

# print(f"Max token length in dataset: {max_len}")
# print(f"Token length distribution (first 10 samples): {token_lengths[:10]}")

In [None]:
# def tokenize_function(examples):
#     inputs = [f"{inp} {out}" for inp, out in zip(examples["input"], examples["output"])]
#     model_inputs = tokenizer(
#         inputs,
#         max_length=1024,
#         truncation=True,
#         padding="max_length",
#         return_tensors="pt",
#     )
#     model_inputs["labels"] = model_inputs["input_ids"].clone()

#     num_tokens = len(model_inputs["input_ids"][0])
#     print(f"Number of tokens: {num_tokens}")
#     return model_inputs

# # Apply tokenization
# tokenized_train = dataset_train.map(tokenize_function, batched=True)
# tokenized_val = dataset_val.map(tokenize_function, batched=True) 

In [None]:
def tokenize_function(examples):
    model_inputs = {
        "input_ids": [],
        "attention_mask": [],
        "labels": [],
    }

    for user_msg, bot_msg in zip(examples["input"], examples["output"]):
        text_pair = f"User: {user_msg} Bot: {bot_msg}"
        tokenized = tokenizer(
            text_pair,
            max_length=200,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )

        # Store tokenized outputs
        model_inputs["input_ids"].append(tokenized["input_ids"].squeeze(0))
        model_inputs["attention_mask"].append(tokenized["attention_mask"].squeeze(0))

        # Labels for training: Shift left for causal language modeling
        labels = tokenized["input_ids"].clone()
        labels[labels == tokenizer.pad_token_id] = -100  # Ignore padding tokens in loss
        model_inputs["labels"].append(labels.squeeze(0))

    return model_inputs

# Apply tokenization
tokenized_train = dataset_train.map(tokenize_function, batched=True)
tokenized_val = dataset_val.map(tokenize_function, batched=True)

In [None]:
print(f"Total training samples: {len(tokenized_train)}")
print(f"Total validation samples: {len(tokenized_val)}")

## Data collator 

In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # Masked language modeling is not used for causal LM
)

## Compute Metrics

## Initialising the model

In [None]:
# Trainer.processing_class = tokenizer.__class__

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

## Saving Trained Model

In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR) 

# Evaluating the model

## Load Test Dataset

In [10]:
file_path = TEST_FILE
with open(file_path, "r", encoding="utf-8") as file:
    chat_data = file.readlines()

# Extract user inputs and reference responses
user_inputs = []
reference_responses = []

for i in range(len(chat_data) - 1):
    if chat_data[i].startswith("User:"):
        user_text = chat_data[i].replace("User:", "").strip()
        ref_text = chat_data[i + 1].replace("Bot:", "").strip() if chat_data[i + 1].startswith("Bot:") else None
        
        if ref_text:  # Ensure reference answer exists
            user_inputs.append(user_text)
            reference_responses.append(ref_text)

# Convert to DataFrame
df = pd.DataFrame({"input": user_inputs, "reference_response": reference_responses})

In [11]:
df

Unnamed: 0,input,reference_response
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you..."
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...
...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ..."
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste..."
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...


## Load Custom Model, GPT2 and Llama

In [5]:
custom_model = AutoModelForCausalLM.from_pretrained("results")
custom_tokenizer = AutoTokenizer.from_pretrained("results")

In [6]:
gpt_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2-large")
gpt_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-large")

In [None]:
!huggingface-cli login

In [None]:
!huggingface-cli whoami

In [7]:
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", use_auth_token=True)
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", use_auth_token=True)



## Generate reponses for all 3 models based on each prompt

In [12]:
import warnings
warnings.filterwarnings("ignore")

import torch
import pandas as pd
import time

# Define models and their corresponding tokenizers in a list
models = [
    {"name": "llama_model", "model": llama_model.to(device), "tokenizer": llama_tokenizer},
    {"name": "gpt_model", "model": gpt_model.to(device), "tokenizer": gpt_tokenizer},
    {"name": "custom_model", "model": custom_model.to(device), "tokenizer": custom_tokenizer}
]

prompt_column = "input"  # Update if your column name is different

# Function to generate chatbot response using the corresponding tokenizer
def chatbot_response(prompt, model, tokenizer):
    system_prompt = "You are a helpful and supportive chatbot. Answer the user's question with empathy, and in a clear and concise way without repeating their words exactly."
    full_prompt = f"{system_prompt}\nUser: {prompt}\nBot:"

    inputs = tokenizer(full_prompt, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}
    model.config.eos_token_id = -1

    start_time = time.time()
    outputs = model.generate(
        **inputs, 
        max_length=350,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,  
        temperature=0.6,  
        top_p=0.9,  
        top_k=50  
    )
    end_time = time.time()

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean up response
    response = response.replace(system_prompt, "").strip()
    if "Bot:" in response:
        response = response.split("Bot:")[-1].strip()

    time_taken = end_time - start_time

    return response, time_taken

for model_info in models:
    model_name = model_info["name"]
    model = model_info["model"]
    tokenizer = model_info["tokenizer"]
    
    responses = []
    time_taken_values = []
    for prompt in df[prompt_column]:
        response, time_taken = chatbot_response(prompt, model, tokenizer)
        responses.append(response) 
        time_taken_values.append(time_taken)

    df[f"{model_name}_response"] = responses
    df[f"{model_name}_time_taken"] = time_taken_values

df

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Unnamed: 0,input,reference_response,llama_model_response,llama_model_time_taken,gpt_model_response,gpt_model_time_taken,custom_model_response,custom_model_time_taken
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...,It takes immense courage to acknowledge your c...,7.178031,You're not alone! We've all been there. It can...,56.804922,It can be really tough to deal with your own e...,9.899035
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...,That can be really tough to navigate when you'...,8.727993,You're not alone! There are many people who ha...,85.809733,It can be really tough to feel like you're not...,13.324115
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you...",It can be frustrating when it feels like you'r...,19.192012,You're not getting any recognition for your wo...,86.065798,Feeling undervalued can be really tough to dea...,10.004252
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...,That can be frustrating but try having an open...,5.717173,You're right! We have lots of people who can d...,19.155696,It can be really frustrating when you think ot...,7.776339
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...,Feeling stuck is common! Here are some suggest...,8.072655,You're feeling stuck because you don't know wh...,23.956055,Feeling stuck or disconnected from your curren...,14.612745
...,...,...,...,...,...,...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ...","""This can lead them to believe they need to fo...",23.751215,Not yet. Humans still need language to express...,71.689826,It can be frustrating when others seem to have...,10.886436
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste...",It can be really tough to see things that don'...,5.970684,Because you're not human! You don't have feeli...,60.309200,Feeling stuck can be really tough to deal with...,10.579578
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...,It sounds like you're feeling overwhelmed! Dur...,22.628602,You should try to avoid doing things that will...,26.061828,Remember to take breaks throughout your day; e...,12.604105
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...,Feeling anxious about upcoming meetings is com...,17.220079,You should relax! It is normal to be nervous w...,21.742721,Feeling nervous about upcoming events is commo...,12.210180


## Calculate BERTScore and empathy score for each model reponse

In [14]:
from bert_score import BERTScorer

empathy_tokenizer = AutoTokenizer.from_pretrained("vtiyyal1/empathy_model")
empathy_model = AutoModelForSequenceClassification.from_pretrained("vtiyyal1/empathy_model")

def evaluate_empathy(response):
    inputs = empathy_tokenizer.encode(response, return_tensors="pt")
    outputs = empathy_model(inputs)
    empathy_score = torch.nn.functional.softmax(outputs.logits, dim=1)
    
    # Convert to NumPy & detach
    empathy_score = empathy_score.detach().numpy()
    
    # Debugging: Print the shape to check if it's multi-class or single-class
    print(f"Empathy Score Shape: {empathy_score.shape}")  
    
    return empathy_score

# Initialize BERTScorer
scorer = BERTScorer(model_type="bert-base-uncased")

score_df = df 

# List of model response columns
model_response_columns = ["llama_model_response", "gpt_model_response", "custom_model_response"]

for model_col in model_response_columns:
    precision_scores = []
    recall_scores = []
    f1_scores = []
    empathy_scores = []

    for index, row in df.iterrows():
        reference = row["reference_response"]
        candidate = row[model_col]

        # Compute BERTScore
        P, R, F1 = scorer.score([candidate], [reference])
        empathy_score = evaluate_empathy(candidate)

        # Debugging: Print the full empathy_score array
        print(f"Index {index} - Empathy Score Output: {empathy_score}")

        # Handle single-class and multi-class cases
        if empathy_score.shape[1] > 1:  
            empathy_scores.append(empathy_score[0][1])  # Multi-class (index 1 is safe)
        else:
            empathy_scores.append(empathy_score[0][0])  # Single-class (use index 0)

        precision_scores.append(P.item())  
        recall_scores.append(R.item())
        f1_scores.append(F1.item())

    # Store scores in the DataFrame
    score_df[f"{model_col}_BERT_Precision"] = precision_scores
    score_df[f"{model_col}_BERT_Recall"] = recall_scores
    score_df[f"{model_col}_BERT_F1"] = f1_scores 
    score_df[f"{model_col}_Empathy_Score"] = empathy_scores

score_df

Empathy Score Shape: (1, 1)
Index 0 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 1 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 2 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 3 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 4 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 5 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 6 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 7 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 8 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 9 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 10 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 11 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 12 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 13 - Empathy Score Output: [[1.]]
Empathy Score Shape: (1, 1)
Index 14 - Empathy Score Outpu

Unnamed: 0,input,reference_response,llama_model_response,llama_model_time_taken,gpt_model_response,gpt_model_time_taken,custom_model_response,custom_model_time_taken,llama_model_response_BERT_Precision,llama_model_response_BERT_Recall,llama_model_response_BERT_F1,llama_model_response_Empathy_Score,gpt_model_response_BERT_Precision,gpt_model_response_BERT_Recall,gpt_model_response_BERT_F1,gpt_model_response_Empathy_Score,custom_model_response_BERT_Precision,custom_model_response_BERT_Recall,custom_model_response_BERT_F1,custom_model_response_Empathy_Score
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...,It takes immense courage to acknowledge your c...,7.178031,You're not alone! We've all been there. It can...,56.804922,It can be really tough to deal with your own e...,9.899035,0.475798,0.606598,0.533295,1.0,0.418724,0.577408,0.485426,1.0,0.402647,0.458811,0.428898,1.0
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...,That can be really tough to navigate when you'...,8.727993,You're not alone! There are many people who ha...,85.809733,It can be really tough to feel like you're not...,13.324115,0.458251,0.631658,0.531160,1.0,0.386464,0.528269,0.446375,1.0,0.453974,0.575235,0.507461,1.0
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you...",It can be frustrating when it feels like you'r...,19.192012,You're not getting any recognition for your wo...,86.065798,Feeling undervalued can be really tough to dea...,10.004252,0.448667,0.613510,0.518297,1.0,0.403619,0.526456,0.456926,1.0,0.491554,0.590614,0.536550,1.0
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...,That can be frustrating but try having an open...,5.717173,You're right! We have lots of people who can d...,19.155696,It can be really frustrating when you think ot...,7.776339,0.444200,0.496332,0.468821,1.0,0.408612,0.500665,0.449979,1.0,0.501890,0.589699,0.542262,1.0
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...,Feeling stuck is common! Here are some suggest...,8.072655,You're feeling stuck because you don't know wh...,23.956055,Feeling stuck or disconnected from your curren...,14.612745,0.447163,0.530491,0.485276,1.0,0.430438,0.470008,0.449354,1.0,0.483122,0.566879,0.521660,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ...","""This can lead them to believe they need to fo...",23.751215,Not yet. Humans still need language to express...,71.689826,It can be frustrating when others seem to have...,10.886436,0.353153,0.499097,0.413629,1.0,0.385865,0.475733,0.426112,1.0,0.460870,0.523212,0.490066,1.0
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste...",It can be really tough to see things that don'...,5.970684,Because you're not human! You don't have feeli...,60.309200,Feeling stuck can be really tough to deal with...,10.579578,0.487460,0.565595,0.523629,1.0,0.386213,0.485401,0.430163,1.0,0.457551,0.564071,0.505258,1.0
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...,It sounds like you're feeling overwhelmed! Dur...,22.628602,You should try to avoid doing things that will...,26.061828,Remember to take breaks throughout your day; e...,12.604105,0.432504,0.618561,0.509065,1.0,0.447943,0.476662,0.461856,1.0,0.462998,0.482975,0.472775,1.0
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...,Feeling anxious about upcoming meetings is com...,17.220079,You should relax! It is normal to be nervous w...,21.742721,Feeling nervous about upcoming events is commo...,12.210180,0.437630,0.593978,0.503956,1.0,0.489538,0.573815,0.528337,1.0,0.526734,0.602382,0.562024,1.0


In [22]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load the model and tokenizer
empathy_tokenizer = AutoTokenizer.from_pretrained("vtiyyal1/empathy_model")
empathy_model = AutoModelForSequenceClassification.from_pretrained("vtiyyal1/empathy_model")

def get_empathy_score(response: str) -> float:
    """
    Compute an empathy score for the given response using the fine-tuned model.
    
    Args:
        response (str): The input response text.
        
    Returns:
        float: An empathy score between 0 and 1, with 1 being the highest empathy.
    """
    # Tokenize the input response
    inputs = empathy_tokenizer(response, return_tensors="pt", truncation=True, padding=True)
    
    # Get model output
    with torch.no_grad():
        logits = empathy_model(**inputs).logits
    
    # Apply softmax to get probabilities
    probs = F.softmax(logits, dim=-1)
    
    # Assuming higher empathy is associated with the higher index in classification (e.g., class 1)
    empathy_score = probs[:, -1].item()  # Take the probability of the last class (highest empathy)
    
    return round(empathy_score, 4)  # Return score rounded to 4 decimal places

# Example usage
response_text = "get a life. its only a small cut"
score = get_empathy_score(response_text)
print(f"Empathy Score: {score}")


Empathy Score: 1.0


## Calculate weighted score 

In [18]:
gpt_bert_f1score = df["gpt_model_response_BERT_F1"].mean()
gpt_bert_recall_score = df["gpt_model_response_BERT_Recall"].mean()
gpt_bert_precision_score = df["gpt_model_response_BERT_Precision"].mean()
gpt_time_taken = df["gpt_model_time_taken"].mean()
gpt_empathy_score = df["gpt_model_response_Empathy_Score"].mean()

luminai_bert_f1score = df["custom_model_response_BERT_F1"].mean()
luminai_bert_recall_score = df["custom_model_response_BERT_Recall"].mean()
luminai_bert_precision_score = df["custom_model_response_BERT_Precision"].mean()
luminai_time_taken = df["custom_model_time_taken"].mean()
luminai_empathy_score = df["custom_model_response_Empathy_Score"].mean()

llama_bert_f1_score = df["llama_model_response_BERT_F1"].mean()
llama_bert_recall_score = df["llama_model_response_BERT_Recall"].mean()
llama_bert_precision_score = df["llama_model_response_BERT_Precision"].mean()
llama_time_taken = df["llama_model_time_taken"].mean()
llama_empathy_score = df["llama_model_response_Empathy_Score"].mean()

metrics_df = pd.DataFrame({
    "Metric": ["BERTScore_F1", "BERTScore_Precision", "BERTScore_Recall", "Time Taken", "Empathy Score"],
    "GPT": [gpt_bert_f1score, gpt_bert_precision_score, gpt_bert_recall_score, gpt_time_taken, gpt_empathy_score],
    "LuminAI": [luminai_bert_f1score, luminai_bert_precision_score, luminai_bert_recall_score, luminai_time_taken, luminai_empathy_score],
    "Llama": [llama_bert_f1_score, llama_bert_precision_score, llama_bert_recall_score, llama_time_taken, llama_empathy_score],
})

metrics_df

Unnamed: 0,Metric,GPT,LuminAI,Llama
0,BERTScore_F1,0.452541,0.505347,0.486793
1,BERTScore_Precision,0.411308,0.468328,0.44057
2,BERTScore_Recall,0.50603,0.550094,0.547608
3,Time Taken,48.098783,12.520367,12.822732
4,Empathy Score,1.0,1.0,1.0


In [None]:
df.to_csv("metrics.csv", index=False)

# Testing the model

## Connection to Database

In [None]:
load_dotenv()

USER = os.getenv("user")
PASSWORD = os.getenv("password")
HOST = os.getenv("host")
PORT = os.getenv("port")
DBNAME = os.getenv("dbname")

try:
    connection = psycopg2.connect(
        user=USER,
        password=PASSWORD,
        host=HOST,
        port=PORT,
        dbname=DBNAME
    )
    print("Connection successful!")
    
    # Create a cursor to execute SQL queries
    cursor = connection.cursor()

except Exception as e:
    print(f"Failed to connect: {e}")

## Retrieval Augmented Generation

In [None]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def store_conversation(user_message, bot_response, sentiment):
    """Stores a conversation in Supabase PostgreSQL."""
    embedding = embedding_model.encode([user_message])[0].tolist()
    try:
        cursor.execute(
            "INSERT INTO conversations (user_message, bot_response, sentiment, embedding) VALUES (%s, %s, %s, %s)",
            (user_message, bot_response, sentiment, embedding)
        )
        connection.commit()
        print("Stored conversation")
    except Exception as e:
        print(e)

In [None]:
def retrieve_past_conversations(query):
    query_embedding = embedding_model.encode([query])[0] 

    if isinstance(query_embedding, np.ndarray): 
        query_embedding = query_embedding.tolist()

    cursor.execute(
        "SELECT timestamp, user_message, bot_response FROM conversations "
        "ORDER BY embedding <-> %s::vector LIMIT 5",
        (query_embedding,)  # Ensure it's passed as a tuple
    )

    results = cursor.fetchall()

    if results:
        context = "\n".join([f"[{r[0]}] User: {r[1]}\nBot: {r[2]}" for r in results])
        return context
    else:
        return ""

## Load trained model

In [None]:
model = AutoModelForCausalLM.from_pretrained("results", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("results", device_map="auto")

model.to(device)

## Models for translation and sentiment analysis

In [None]:
pipe = pipeline("text2text-generation", model="Varine/opus-mt-zh-en-model")
sentiment_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
def get_sentiment(text):
    """Improved sentiment analysis with keyword-based distress detection."""
    crisis_keywords = ["end my life", "suicide", "don't want to live", "kill myself", "worthless", "no reason to live"]

    # Check if crisis words are in the input
    if any(phrase in text.lower() for phrase in crisis_keywords):
        return "crisis"  # Override sentiment if crisis words are detected

    # Otherwise, use DistilBERT-based sentiment analysis
    result = sentiment_classifier(text)[0]
    label = result['label']

    # Convert to sentiment categories based on DistilBERT outputs
    if label == "NEGATIVE":
        return "negative"
    elif label == "POSITIVE":
        return "positive"
    else:
        return "neutral"

## Chatbot response

In [None]:
def chatbot_response(prompt):
    retrieved_context = retrieve_past_conversations(prompt)
    translated_prompt = pipe(prompt)[0].get("generated_text", "Translation failed")  

    print(translated_prompt)

    system_prompt = "You are a helpful and supportive chatbot. Answer the user's question in a clear and concise way without repeating their words exactly."
    full_prompt = f"{system_prompt}\n{retrieved_context}\nUser: {prompt}\nBot:"

    sentiment_results = get_sentiment(prompt)

    inputs = tokenizer(full_prompt, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}

    outputs = model.generate(
        **inputs, 
        max_new_tokens=650,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,  
        temperature=0.8,  
        top_p=0.9,  #
        top_k=50  
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Ensure the response does not include the system prompt
    response = response.replace(system_prompt, "").strip()
    
    # Remove any leftover system prompt instructions
    if "Bot:" in response:
        response = response.split("Bot:")[-1].strip()

    # Translate response to Chinese
    # translated = pipe(response)[0]['translation_text']
    translated_text = pipe(response)[0].get("generated_text", "Translation failed")  # Use .get() to avoid KeyError

    return response, translated_text, sentiment_results

In [None]:
if __name__ == "__main__":
    print("Chatbot is ready! Type 'exit' to stop.")
    while True:        
        user_input = input("User: ")
        if user_input.lower() == "exit":
            cursor.close()
            connection.close()
            print("DB connection ended")
            break
        response, translated, sentiment_results = chatbot_response(user_input)
        store_conversation(user_input, response, sentiment_results)
        print(f"User: {user_input}")
        print(f"Bot: {response}")
        print(f"Translated Text: {translated}")
        print(f"Sentiment Results: {sentiment_results}")