# Importing libraries

In [2]:
import torch
import os
import psycopg2
import evaluate
import time 

import pandas as pd
import numpy as np

from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, pipeline, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer
from datasets import Dataset
from dotenv import load_dotenv

# Defining file paths

In [3]:
OUTPUT_DIR = "results"
LOG_DIR = "logs"
TRAIN_FILE = "dataset/train.txt"  # Path to your training data file
VAL_FILE = "dataset/validation.txt"  # Path to your validation data file
TEST_FILE = "dataset/test.txt"  # Path to your test data file

# Setting the device

In [None]:
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device) 

cuda


# Loading Pre-trained Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")

model.to(device)

In [None]:
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")

In [None]:
original_vocab_size = model.config.vocab_size
print(f"Original vocab size: {original_vocab_size}")

In [None]:
print(f"Model max length: {model.config.max_position_embeddings}")

In [None]:
print(f"Max sequence length: {tokenizer.model_max_length}")

In [None]:
model.gradient_checkpointing_enable()
model.resize_token_embeddings(len(tokenizer))

In [None]:
# model.config.use_cache = False

In [None]:
def count_tokens(file_path, tokenizer):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
    tokens = tokenizer.tokenize(text)
    return len(tokens)

# Count tokens in training and validation files
train_token_count = count_tokens(TRAIN_FILE, tokenizer)
eval_token_count = count_tokens(VAL_FILE, tokenizer)
print(f"Train Tokens: {train_token_count}")
print(f"Validation Tokens: {eval_token_count}")

# Training the model

## Setting up training arguments

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    logging_strategy="epoch",     # Logs loss at intervals
    learning_rate=5e-6,
    per_device_train_batch_size=2,  # Reduced batch size for limited GPU memory
    per_device_eval_batch_size=2,
    num_train_epochs=10,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir=LOG_DIR,
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch sizes
    fp16=True,
    bf16=False,
    optim="adamw_torch",
)

## Loading dataset

In [None]:
def load_chatbot_data(file_path):
    """Load and preprocess chatbot data from the given text file."""
    conversations = []
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
        user_input, bot_response = None, None
        for line in lines:
            if line.startswith("User:"):
                user_input = line.replace("user:", "").strip()
            elif line.startswith("Bot:"):
                bot_response = line.replace("bot:", "").strip()
                if user_input and bot_response:
                    conversations.append({"input": user_input, "output": bot_response})
                    user_input, bot_response = None, None
    return pd.DataFrame(conversations)

In [None]:
df_train = load_chatbot_data(TRAIN_FILE)
df_val = load_chatbot_data(VAL_FILE)
dataset_train = Dataset.from_pandas(df_train)
dataset_val = Dataset.from_pandas(df_val)

print(f"Length of training dataset: {len(df_train)}")
print(f"Length of validation dataset: {len(df_val)}")

## Tokenizing dataset

In [None]:
# def find_max_interaction_length(dataset):
#     length_distribution = []

#     for user_msg, bot_msg in zip(dataset["input"], dataset["output"]):
#         # Combine user message and bot response
#         text_pair = f"User: {user_msg} Bot: {bot_msg}"

#         # Tokenize
#         tokenized = tokenizer(text_pair, truncation=False, padding=False)

#         # Get the token length of this interaction
#         num_tokens = len(tokenized["input_ids"])
#         length_distribution.append(num_tokens)

#     # Find the maximum token length
#     max_length = max(length_distribution)

#     return max_length, length_distribution

# dataset = load_chatbot_data("dataset/dataset.txt")
# max_len, token_lengths = find_max_interaction_length(dataset)

# print(f"Max token length in dataset: {max_len}")
# print(f"Token length distribution (first 10 samples): {token_lengths[:10]}")

In [None]:
# def tokenize_function(examples):
#     inputs = [f"{inp} {out}" for inp, out in zip(examples["input"], examples["output"])]
#     model_inputs = tokenizer(
#         inputs,
#         max_length=1024,
#         truncation=True,
#         padding="max_length",
#         return_tensors="pt",
#     )
#     model_inputs["labels"] = model_inputs["input_ids"].clone()

#     num_tokens = len(model_inputs["input_ids"][0])
#     print(f"Number of tokens: {num_tokens}")
#     return model_inputs

# # Apply tokenization
# tokenized_train = dataset_train.map(tokenize_function, batched=True)
# tokenized_val = dataset_val.map(tokenize_function, batched=True) 

In [None]:
def tokenize_function(examples):
    model_inputs = {
        "input_ids": [],
        "attention_mask": [],
        "labels": [],
    }

    for user_msg, bot_msg in zip(examples["input"], examples["output"]):
        text_pair = f"User: {user_msg} Bot: {bot_msg}"
        tokenized = tokenizer(
            text_pair,
            max_length=200,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )

        # Store tokenized outputs
        model_inputs["input_ids"].append(tokenized["input_ids"].squeeze(0))
        model_inputs["attention_mask"].append(tokenized["attention_mask"].squeeze(0))

        # Labels for training: Shift left for causal language modeling
        labels = tokenized["input_ids"].clone()
        labels[labels == tokenizer.pad_token_id] = -100  # Ignore padding tokens in loss
        model_inputs["labels"].append(labels.squeeze(0))

    return model_inputs

# Apply tokenization
tokenized_train = dataset_train.map(tokenize_function, batched=True)
tokenized_val = dataset_val.map(tokenize_function, batched=True)

In [None]:
print(f"Total training samples: {len(tokenized_train)}")
print(f"Total validation samples: {len(tokenized_val)}")

## Data collator 

In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # Masked language modeling is not used for causal LM
)

## Compute Metrics

## Initialising the model

In [None]:
# Trainer.processing_class = tokenizer.__class__

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

## Saving Trained Model

In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR) 

# Evaluating the model

## Load Test Dataset

In [5]:
file_path = TEST_FILE
with open(file_path, "r", encoding="utf-8") as file:
    chat_data = file.readlines()

# Extract user inputs and reference responses
user_inputs = []
reference_responses = []

for i in range(len(chat_data) - 1):
    if chat_data[i].startswith("User:"):
        user_text = chat_data[i].replace("User:", "").strip()
        ref_text = chat_data[i + 1].replace("Bot:", "").strip() if chat_data[i + 1].startswith("Bot:") else None
        
        if ref_text:  # Ensure reference answer exists
            user_inputs.append(user_text)
            reference_responses.append(ref_text)

# Convert to DataFrame
df = pd.DataFrame({"input": user_inputs, "reference_response": reference_responses})

In [6]:
df

Unnamed: 0,input,reference_response
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you..."
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...
...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ..."
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste..."
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...


## Load Custom Model, GPT2 and Llama

In [7]:
custom_model = AutoModelForCausalLM.from_pretrained("results")
custom_tokenizer = AutoTokenizer.from_pretrained("results")

In [8]:
gpt_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2-large")
gpt_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-large")

In [9]:
!huggingface-cli login

^C


In [10]:
!huggingface-cli whoami

alvinwongster


In [11]:
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", use_auth_token=True)
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", use_auth_token=True)



## Generate reponses for all 3 models based on each prompt

In [12]:
import warnings
warnings.filterwarnings("ignore")

import torch
import pandas as pd
import time

# Define models and their corresponding tokenizers in a list
models = [
    {"name": "llama_model", "model": llama_model.to(device), "tokenizer": llama_tokenizer},
    {"name": "gpt_model", "model": gpt_model.to(device), "tokenizer": gpt_tokenizer},
    {"name": "custom_model", "model": custom_model.to(device), "tokenizer": custom_tokenizer}
]

prompt_column = "input"  # Update if your column name is different

# Function to generate chatbot response using the corresponding tokenizer
def chatbot_response(prompt, model, tokenizer):
    system_prompt = "You are a helpful and supportive chatbot. Answer the user's question with empathy, and in a clear and concise way without repeating their words exactly."
    full_prompt = f"{system_prompt}\nUser: {prompt}\nBot:"

    inputs = tokenizer(full_prompt, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}
    model.config.eos_token_id = -1

    start_time = time.time()
    outputs = model.generate(
        **inputs, 
        max_length=350,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,  
        temperature=0.6,  
        top_p=0.9,  
        top_k=50  
    )
    end_time = time.time()

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean up response
    response = response.replace(system_prompt, "").strip()
    if "Bot:" in response:
        response = response.split("Bot:")[-1].strip()

    time_taken = end_time - start_time

    return response, time_taken

for model_info in models:
    model_name = model_info["name"]
    model = model_info["model"]
    tokenizer = model_info["tokenizer"]
    
    responses = []
    time_taken_values = []
    for prompt in df[prompt_column]:
        response, time_taken = chatbot_response(prompt, model, tokenizer)
        responses.append(response) 
        time_taken_values.append(time_taken)

    df[f"{model_name}_response"] = responses
    df[f"{model_name}_time_taken"] = time_taken_values

df.to_csv("reponse_df.csv")
df

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Unnamed: 0,input,reference_response,llama_model_response,llama_model_time_taken,gpt_model_response,gpt_model_time_taken,custom_model_response,custom_model_time_taken
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...,That must be incredibly tough for you to hear ...,6.050337,You're not alone! We've all been there. It can...,58.169342,It can be really tough to deal with your own e...,17.592414
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...,It takes time to discover what you're passiona...,6.759610,You're not alone! There are many people who ha...,88.296812,It can be really tough to feel like you're not...,24.922911
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you...",That must have been really disheartening for b...,16.784726,You're not getting any recognition for your wo...,88.176957,Feeling undervalued can be really tough to dea...,19.284474
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...,Sorry to hear that it felt overwhelming for yo...,13.961601,You're right! We have lots of people who can d...,19.575490,It can be really frustrating when you think ot...,15.057861
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...,That must be incredibly difficult for you righ...,14.847151,You're feeling stuck because you don't know wh...,24.185220,Feeling stuck or disconnected from your curren...,27.804937
...,...,...,...,...,...,...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ...",It sounds like you're feeling frustrated about...,14.436337,Not yet. Humans still need language to express...,173.594728,It can be frustrating when others seem to have...,37.623534
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste...",That can be really tough to deal with when we ...,13.037065,Because you're not human! You don't have feeli...,115.669803,Feeling stuck can be really tough to deal with...,40.339920
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...,It sounds like you're feeling overwhelmed! Pri...,19.115128,You should try to avoid doing things that will...,40.422752,Remember to take breaks throughout your day; e...,45.955082
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...,It sounds like you're feeling some pre-present...,9.684567,You should relax! It is normal to be nervous w...,33.463500,Feeling nervous about upcoming events is commo...,49.452191


## Calculate BERTScore and empathy score for each model reponse

In [15]:
from bert_score import BERTScorer
from sentence_transformers import SentenceTransformer, util

empathy_pipeline = pipeline("text-classification", model="vtiyyal1/empathy_model")
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def evaluate_empathy(response):
    scores = empathy_pipeline(response)[0]
    empathetic_score = scores['score'] 

    return empathetic_score

def compute_human_likeness_score(chatbot_response, human_responses):
    chatbot_embedding = embedding_model.encode(chatbot_response, convert_to_tensor=True)

    # Encode human-written responses
    human_embeddings = embedding_model.encode(human_responses, convert_to_tensor=True)

    # Compute cosine similarity with each human response
    similarities = util.pytorch_cos_sim(chatbot_embedding, human_embeddings)

    # Return the highest similarity score
    return float(torch.max(similarities))

# Initialize BERTScorer
scorer = BERTScorer(model_type="bert-base-uncased")

score_df = df 

# List of model response columns
model_response_columns = ["llama_model_response", "gpt_model_response", "custom_model_response"]

for model_col in model_response_columns:
    precision_scores = []
    recall_scores = []
    f1_scores = []
    empathy_scores = []
    human_likeness_scores = []

    for index, row in df.iterrows():
        reference = row["reference_response"]
        candidate = row[model_col]

        # Compute BERTScore
        P, R, F1 = scorer.score([candidate], [reference])
        empathy_score = evaluate_empathy(candidate)
        human_likeness_score = compute_human_likeness_score(candidate, reference)

        precision_scores.append(P.item())  
        recall_scores.append(R.item())
        f1_scores.append(F1.item())
        empathy_scores.append(empathy_score)
        human_likeness_scores.append(human_likeness_score)

    # Store scores in the DataFrame
    score_df[f"{model_col}_BERT_Precision"] = precision_scores
    score_df[f"{model_col}_BERT_Recall"] = recall_scores
    score_df[f"{model_col}_BERT_F1"] = f1_scores 
    score_df[f"{model_col}_Empathy_Score"] = empathy_scores
    score_df[f"{model_col}_Human_Likeness_SCore"] = human_likeness_scores

score_df

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Unnamed: 0,input,reference_response,llama_model_response,llama_model_time_taken,gpt_model_response,gpt_model_time_taken,custom_model_response,custom_model_time_taken,llama_model_response_BERT_Precision,llama_model_response_BERT_Recall,...,gpt_model_response_BERT_Precision,gpt_model_response_BERT_Recall,gpt_model_response_BERT_F1,gpt_model_response_Empathy_Score,gpt_model_response_Human_Likeness_SCore,custom_model_response_BERT_Precision,custom_model_response_BERT_Recall,custom_model_response_BERT_F1,custom_model_response_Empathy_Score,custom_model_response_Human_Likeness_SCore
0,I'm worried that my mental health struggles wi...,Your awareness is a positive first step. Focus...,That must be incredibly tough for you to hear ...,6.050337,You're not alone! We've all been there. It can...,58.169342,It can be really tough to deal with your own e...,17.592414,0.395579,0.445057,...,0.418724,0.577408,0.485426,0.816975,0.263383,0.402647,0.458811,0.428898,0.783792,0.420539
1,I'm worried I'll never find a job I truly enjoy.,Finding fulfilling work can take time. Explore...,It takes time to discover what you're passiona...,6.759610,You're not alone! There are many people who ha...,88.296812,It can be really tough to feel like you're not...,24.922911,0.462695,0.574290,...,0.386464,0.528269,0.446375,0.818917,0.244039,0.453974,0.575235,0.507461,0.785639,0.581078
2,How do I stop feeling like others always get t...,"Instead of focusing on others, acknowledge you...",That must have been really disheartening for b...,16.784726,You're not getting any recognition for your wo...,88.176957,Feeling undervalued can be really tough to dea...,19.284474,0.451992,0.546833,...,0.403619,0.526456,0.456926,0.813223,0.306921,0.491554,0.590614,0.536550,0.780746,0.505758
3,I feel like I'm not smart enough compared to m...,Intelligence comes in many forms. Focus on you...,Sorry to hear that it felt overwhelming for yo...,13.961601,You're right! We have lots of people who can d...,19.575490,It can be really frustrating when you think ot...,15.057861,0.413430,0.504697,...,0.408612,0.500665,0.449979,0.782813,0.297600,0.501890,0.589699,0.542262,0.771097,0.414571
4,Why do I feel like I’m not living in the present?,Feeling disconnected from the present often co...,That must be incredibly difficult for you righ...,14.847151,You're feeling stuck because you don't know wh...,24.185220,Feeling stuck or disconnected from your curren...,27.804937,0.417864,0.495598,...,0.430438,0.470008,0.449354,0.789842,0.144585,0.483122,0.566879,0.521660,0.795552,0.720362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Why does everyone else get opportunities that ...,"It may seem that way, but opportunities often ...",It sounds like you're feeling frustrated about...,14.436337,Not yet. Humans still need language to express...,173.594728,It can be frustrating when others seem to have...,37.623534,0.389727,0.498271,...,0.385865,0.475733,0.426112,0.802557,0.101498,0.460870,0.523212,0.490066,0.787361,0.514936
96,Why do I feel like nothing will ever change?,"Change can feel impossible, but even small ste...",That can be really tough to deal with when we ...,13.037065,Because you're not human! You don't have feeli...,115.669803,Feeling stuck can be really tough to deal with...,40.339920,0.447204,0.468491,...,0.386213,0.485401,0.430163,0.809288,-0.013680,0.457551,0.564071,0.505258,0.784659,0.169350
97,How can I prioritize self-care during busy per...,Prioritizing self-care during busy times invol...,It sounds like you're feeling overwhelmed! Pri...,19.115128,You should try to avoid doing things that will...,40.422752,Remember to take breaks throughout your day; e...,45.955082,0.461408,0.648822,...,0.447943,0.476662,0.461856,0.793066,0.353879,0.462998,0.482975,0.472775,0.780747,0.383009
98,What can I do if I feel anxious before big pre...,Anxiety before presentations is normal. Practi...,It sounds like you're feeling some pre-present...,9.684567,You should relax! It is normal to be nervous w...,33.463500,Feeling nervous about upcoming events is commo...,49.452191,0.472570,0.586898,...,0.489538,0.573815,0.528337,0.792133,0.666354,0.526734,0.602382,0.562024,0.798762,0.688468


In [16]:
score_df.to_csv("score_df.csv")

## Calculate weighted score 

In [19]:
import pandas as pd

# Assuming score_df is already loaded with the necessary columns

# Calculate means for each metric
gpt_metrics = {
    "BERTScore_F1": score_df["gpt_model_response_BERT_F1"].mean(),
    "BERTScore_Precision": score_df["gpt_model_response_BERT_Precision"].mean(),
    "BERTScore_Recall": score_df["gpt_model_response_BERT_Recall"].mean(),
    "Time Taken": score_df["gpt_model_time_taken"].mean(),
    "Empathy Score": score_df["gpt_model_response_Empathy_Score"].mean(),
    "Human Likeness Score": score_df["gpt_model_response_Human_Likeness_SCore"].mean()
}

luminai_metrics = {
    "BERTScore_F1": score_df["custom_model_response_BERT_F1"].mean(),
    "BERTScore_Precision": score_df["custom_model_response_BERT_Precision"].mean(),
    "BERTScore_Recall": score_df["custom_model_response_BERT_Recall"].mean(),
    "Time Taken": score_df["custom_model_time_taken"].mean(),
    "Empathy Score": score_df["custom_model_response_Empathy_Score"].mean(),
    "Human Likeness Score": score_df["custom_model_response_Human_Likeness_SCore"].mean()
}

llama_metrics = {
    "BERTScore_F1": score_df["llama_model_response_BERT_F1"].mean(),
    "BERTScore_Precision": score_df["llama_model_response_BERT_Precision"].mean(),
    "BERTScore_Recall": score_df["llama_model_response_BERT_Recall"].mean(),
    "Time Taken": score_df["llama_model_time_taken"].mean(),
    "Empathy Score": score_df["llama_model_response_Empathy_Score"].mean(),
    "Human Likeness Score": score_df["llama_model_response_Human_Likeness_SCore"].mean()
}

# Create DataFrame and round to 2 decimal places
metrics_score_df = pd.DataFrame({
    "Metric": list(gpt_metrics.keys()),
    "GPT": list(gpt_metrics.values()),
    "LuminAI": list(luminai_metrics.values()),
    "Llama": list(llama_metrics.values())
}).round(2)

# Calculate weighted scores
weights = {
    "Empathy Score": 0.4,
    "Human Likeness Score": 0.2,
    "BERTScore_F1": 0.1,
    "BERTScore_Precision": 0.1,
    "BERTScore_Recall": 0.1,
    "Time Taken": 0.1  # Handled separately
}

# Normalize time taken (shortest gets 1, longest gets 1/3)
time_values = [gpt_metrics["Time Taken"], luminai_metrics["Time Taken"], llama_metrics["Time Taken"]]
min_time = min(time_values)
max_time = max(time_values)

time_scores = [(1 if t == min_time else (1/3 if t == max_time else 2/3)) for t in time_values]

# Compute final weighted scores
def calculate_final_score(model_metrics, time_score):
    score = sum(model_metrics[m] * weights[m] for m in weights if m != "Time Taken")
    score += time_score * weights["Time Taken"]
    return round(score, 2)

final_scores = {
    "GPT": calculate_final_score(gpt_metrics, time_scores[0]),
    "LuminAI": calculate_final_score(luminai_metrics, time_scores[1]),
    "Llama": calculate_final_score(llama_metrics, time_scores[2]),
}

# Append final score to DataFrame
final_score_row = pd.DataFrame({
    "Metric": ["Final Score"],
    "GPT": [final_scores["GPT"]],
    "LuminAI": [final_scores["LuminAI"]],
    "Llama": [final_scores["Llama"]]
})

metrics_score_df = pd.concat([metrics_score_df, final_score_row], ignore_index=True)

metrics_score_df


Unnamed: 0,Metric,GPT,LuminAI,Llama
0,BERTScore_F1,0.45,0.51,0.48
1,BERTScore_Precision,0.41,0.47,0.44
2,BERTScore_Recall,0.51,0.55,0.53
3,Time Taken,89.65,39.42,15.85
4,Empathy Score,0.8,0.79,0.79
5,Human Likeness Score,0.27,0.5,0.45
6,Final Score,0.54,0.63,0.65


# Testing the model

## Connection to Database

In [None]:
load_dotenv()

USER = os.getenv("user")
PASSWORD = os.getenv("password")
HOST = os.getenv("host")
PORT = os.getenv("port")
DBNAME = os.getenv("dbname")

try:
    connection = psycopg2.connect(
        user=USER,
        password=PASSWORD,
        host=HOST,
        port=PORT,
        dbname=DBNAME
    )
    print("Connection successful!")
    
    # Create a cursor to execute SQL queries
    cursor = connection.cursor()

except Exception as e:
    print(f"Failed to connect: {e}")

## Retrieval Augmented Generation

In [None]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def store_conversation(user_message, bot_response, sentiment):
    """Stores a conversation in Supabase PostgreSQL."""
    embedding = embedding_model.encode([user_message])[0].tolist()
    try:
        cursor.execute(
            "INSERT INTO conversations (user_message, bot_response, sentiment, embedding) VALUES (%s, %s, %s, %s)",
            (user_message, bot_response, sentiment, embedding)
        )
        connection.commit()
        print("Stored conversation")
    except Exception as e:
        print(e)

In [None]:
def retrieve_past_conversations(query):
    query_embedding = embedding_model.encode([query])[0] 

    if isinstance(query_embedding, np.ndarray): 
        query_embedding = query_embedding.tolist()

    cursor.execute(
        "SELECT timestamp, user_message, bot_response FROM conversations "
        "ORDER BY embedding <-> %s::vector LIMIT 5",
        (query_embedding,)  # Ensure it's passed as a tuple
    )

    results = cursor.fetchall()

    if results:
        context = "\n".join([f"[{r[0]}] User: {r[1]}\nBot: {r[2]}" for r in results])
        return context
    else:
        return ""

## Load trained model

In [None]:
model = AutoModelForCausalLM.from_pretrained("results", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("results", device_map="auto")

model.to(device)

## Models for translation and sentiment analysis

In [None]:
pipe = pipeline("text2text-generation", model="Varine/opus-mt-zh-en-model")
sentiment_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
def get_sentiment(text):
    """Improved sentiment analysis with keyword-based distress detection."""
    crisis_keywords = ["end my life", "suicide", "don't want to live", "kill myself", "worthless", "no reason to live"]

    # Check if crisis words are in the input
    if any(phrase in text.lower() for phrase in crisis_keywords):
        return "crisis"  # Override sentiment if crisis words are detected

    # Otherwise, use DistilBERT-based sentiment analysis
    result = sentiment_classifier(text)[0]
    label = result['label']

    # Convert to sentiment categories based on DistilBERT outputs
    if label == "NEGATIVE":
        return "negative"
    elif label == "POSITIVE":
        return "positive"
    else:
        return "neutral"

## Chatbot response

In [None]:
def chatbot_response(prompt):
    retrieved_context = retrieve_past_conversations(prompt)
    translated_prompt = pipe(prompt)[0].get("generated_text", "Translation failed")  

    print(translated_prompt)

    system_prompt = "You are a helpful and supportive chatbot. Answer the user's question in a clear and concise way without repeating their words exactly."
    full_prompt = f"{system_prompt}\n{retrieved_context}\nUser: {prompt}\nBot:"

    sentiment_results = get_sentiment(prompt)

    inputs = tokenizer(full_prompt, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}

    outputs = model.generate(
        **inputs, 
        max_new_tokens=650,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,  
        temperature=0.8,  
        top_p=0.9,  #
        top_k=50  
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Ensure the response does not include the system prompt
    response = response.replace(system_prompt, "").strip()
    
    # Remove any leftover system prompt instructions
    if "Bot:" in response:
        response = response.split("Bot:")[-1].strip()

    # Translate response to Chinese
    # translated = pipe(response)[0]['translation_text']
    translated_text = pipe(response)[0].get("generated_text", "Translation failed")  # Use .get() to avoid KeyError

    return response, translated_text, sentiment_results

In [None]:
if __name__ == "__main__":
    print("Chatbot is ready! Type 'exit' to stop.")
    while True:        
        user_input = input("User: ")
        if user_input.lower() == "exit":
            cursor.close()
            connection.close()
            print("DB connection ended")
            break
        response, translated, sentiment_results = chatbot_response(user_input)
        store_conversation(user_input, response, sentiment_results)
        print(f"User: {user_input}")
        print(f"Bot: {response}")
        print(f"Translated Text: {translated}")
        print(f"Sentiment Results: {sentiment_results}")