This notebook was executed in Google Colab. To begin, connect to a GPU, such as the V100, and upload the necessary datasets. Access to the llama-2-7b model is controlled by Meta. To request access, follow this link: https://huggingface.co/meta-llama/Llama-2-7b. Once permission is granted, you will need to log in to Hugging Face with a valid token.

In [None]:
# capture
%%capture
!pip install accelerate peft bitsandbytes transformers trl sacrebleu rouge

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from typing import List, Tuple
import re
import json
import csv

In [None]:
# # Check if a GPU is available
if not torch.cuda.is_available():
    raise EnvironmentError("This script requires a GPU to run.")

# Constants
MAX_INPUT_TOKEN_LENGTH = 4096
DEFAULT_MAX_NEW_TOKENS = 50

# Load the model and tokenizer
model_id ="benschlagman/llama-2-7b-chat-esconv"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.76k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [None]:
def clean_response(response: str) -> str:
    # remove any numeric sequences and timestamps as before
    response = re.sub(r"\d+", "", response)
    response = re.sub(r"\d{1,2}:\d{2}\s?(AM|PM)", "", response)

    #    identify and keep the first assistant response, remove the rest if it starts talking to itself
    parts = re.split(r"\[/?USER\]|\[/?ASSISTANT\]", response)
    cleaned_parts = []
    assistant_spoken = False
    for part in parts:
        if assistant_spoken:
            # Once the assistant has spoken, ignore further dialogues
            continue
        if part.strip():
            cleaned_parts.append(part.strip())
            assistant_spoken = True  # Mark as spoken after adding a non-empty part

    # Reassemble the cleaned response parts
    cleaned_response = " ".join(cleaned_parts).strip()

    return cleaned_response

def generate_response(
    message: str,
    chat_history: List[Tuple[str, str]],
    system_prompt: str = "You are a mental health supporter. Please provide adequate mental health support.",
    max_new_tokens: int = 75,
    temperature: float = 0.4,
    top_p: float = 0.7,
    top_k: int = 50,
    do_sample=True,
    repetition_penalty: float = 1.2
) -> str:


    conversation = system_prompt
    for user, assistant in chat_history:
        conversation += f"[USER] {user} [/USER] [ASSISTANT] {assistant} [/ASSISTANT] "
    conversation += f"[USER] {message} [/USER]"

    # Tokenize the conversation
    input_ids = tokenizer.encode(conversation, return_tensors="pt", truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH)
    input_ids = input_ids.to(model.device)

    # Generate a response
    output_ids = model.generate(
        input_ids=input_ids,
        max_length=input_ids.shape[1] + max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        repetition_penalty=repetition_penalty
    )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    generated_response = response[len(tokenizer.decode(input_ids[0], skip_special_tokens=True)):].strip()
    cleaned_response = clean_response(generated_response)
    cleaned_response = re.sub(r"\[.*?\]", "", cleaned_response)


    return cleaned_response


In [None]:
def interactive_chat():
    chat_history = []
    print("Llama-2-7b Chatbot. Type 'exit' or 'quit' to end the conversation.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            break

        response = generate_response(user_input, chat_history)
        print("Llama-2-7b:", response)
        chat_history.append((user_input, response))

# Run the interactive chat
interactive_chat()


Llama-2-7b Chatbot. Type 'exit' or 'quit' to end the conversation.

You: I've recently retired, and while I thought I'd enjoy the free time, I'm actually feeling quite lost. It's a lot harder than I expected.
Llama-2-7b: That is understandable. Retirement can be difficult for some people because they have to adjust their routine and find new things to do with all of that extra time on your hands. Have you tried finding hobbies or activities that interest you?
You: I've considered volunteering but haven't taken the plunge yet. As for hobbies, I'm not sure where to start. I guess I'm worried I won't find anything that's as fulfilling as my work was.
Llama-2-7b: Well, there are plenty of online resources available if you want to try out different hobbies! There are also local groups in most areas that meet up regularly. Maybe you could join one of those?
You: That sounds like a good idea, do you think I could find a group that has the same hobbies as me?
Llama-2-7b: Yes, definitely! If yo

In [None]:
# Replace 'file_path' with the path to your text file
file_path = '/content/testdata.txt'

dataset_test = []
with open(file_path, 'r') as file:
    for line in file:
        json_line = json.loads(line.strip())
        dataset_test.append(json_line)

In [None]:
# list of lists
conversation_user = []
conversation_golden_responses = []

for conversation_data in dataset_test:

    list_of_user_inputs = []
    golden_responses = []

    accumulated_user_input = ""

    for turn in conversation_data['dialog']:
        if turn['speaker'] == 'usr':
            user_input = turn['text'].strip()
            accumulated_user_input += user_input + " "

        elif turn['speaker'] == 'sys':
            sys_response = turn['text'].strip()

            if accumulated_user_input:
                list_of_user_inputs.append(accumulated_user_input)
                golden_responses.append(sys_response)
                accumulated_user_input = ""

    if accumulated_user_input:
        list_of_user_inputs.append(accumulated_user_input)
        golden_responses.append("")

    # Append the lists for the current conversation to the main lists
    conversation_user.append(list_of_user_inputs)
    conversation_golden_responses.append(golden_responses)

In [None]:
def generate_model_responses_with_golden_history(conversation_user, conversation_golden_responses):
    model_responses = []
    chat_history = []

    for user_input, golden_responses in zip(conversation_user, conversation_golden_responses):
        # Generate the response from the model
        response = generate_response(user_input, chat_history)
        model_responses.append(response)

        # Update the chat history
        chat_history.append((user_input, golden_responses))

    return model_responses


In [None]:
model_responses = []
for convo_number, (convo, golden_response) in enumerate(zip(conversation_user, conversation_golden_responses), 1):
    print(f'\rGenerating response for convo {convo_number}/195', end='')
    model_response = generate_model_responses_with_golden_history(convo, golden_response)
    model_responses.append(model_response)

    torch.cuda.empty_cache()


Generating response for convo 195/195

In [None]:
model_response_flattened = [item for sublist in model_responses for item in sublist]
conversation_golden_responses_flattened = [item for sublist in conversation_golden_responses for item in sublist]

if len(model_response_flattened) != len(conversation_golden_responses_flattened):
    print("Error: Lists are not of the same length.")
else:
    # Specify the filename
    filename = "finetuned_outputs.csv"

    # Writing to csv file
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        # Create a csv writer object
        csvwriter = csv.writer(csvfile)

        # Write the column headers
        csvwriter.writerow(['model', 'golden'])

        # Write the data
        for i in range(len(model_response_flattened)):
            csvwriter.writerow([model_response_flattened[i], conversation_golden_responses_flattened[i]])

    print(f"CSV file '{filename}' created successfully.")



CSV file 'finetuned_output_3e.csv' created successfully.
