This notebook was executed in Google Colab. To begin, connect to a GPU, such as the V100, and upload the necessary datasets. Access to the llama-2-7b model is controlled by Meta. To request access, follow this link: https://huggingface.co/meta-llama/Llama-2-7b. Once permission is granted, you will need to log in to Hugging Face with a valid token.

In [None]:
%%capture
!pip install accelerate peft bitsandbytes transformers trl sacrebleu rouge

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List, Tuple
from datasets import load_dataset
import json
import csv

from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Check if a GPU is available
if not torch.cuda.is_available():
    raise EnvironmentError("This script requires a GPU to run.")

# Constants
MAX_INPUT_TOKEN_LENGTH = 4096
DEFAULT_MAX_NEW_TOKENS = 50

# Load the model and tokenizer
model_id = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
def generate_response(
    message: str,
    chat_history: List[Tuple[str, str]],
    system_prompt: str = "",
    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
    temperature: float = 0.4,
    top_p: float = 0.9,
    top_k: int = 50,
    repetition_penalty: float = 1.2
) -> str:
    # Build the conversation history
    conversation = []
    if system_prompt:
        conversation.append({"role": "system", "content": system_prompt})
    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    # Tokenize the conversation
    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        print(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
    input_ids = input_ids.to(model.device)

    # Generate a response
    output_ids = model.generate(
        input_ids=input_ids,
        max_new_tokens=200,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
        repetition_penalty=repetition_penalty
    )
    # return output_ids
    full_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    latest_response = full_response.split(message)[-1]
    latest_response = latest_response.replace('[INST]', '').replace('[/INST]', '').strip()
    torch.cuda.empty_cache()

    return latest_response


In [None]:
def interactive_chat():
    chat_history = []
    print("Llama-2-7b Chatbot. Type 'exit' or 'quit' to end the conversation.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            break

        response = generate_response(user_input, chat_history)
        print("Llama-2-7b:", response)
        chat_history.append((user_input, response))

# Run the interactive chat
interactive_chat()


Llama-2-7b Chatbot. Type 'exit' or 'quit' to end the conversation.

You: I've recently retired, and while I thought I'd enjoy the free time, I'm actually feeling quite lost. It's a lot harder than I expected.
Llama-2-7b: Congratulations on your retirement! While it can be exciting to have more free time, it's common for people to feel a sense of loss or uncertainty after leaving their job behind. Here are some reasons why you might be feeling this way:

1. Loss of identity: For many years, your work defined who you were and gave you a sense of purpose. Without that structure, you may struggle with finding a new identity or sense of self.
2. Social isolation: If you spent most of your time at work, you may find yourself missing the social interactions and connections you had with colleagues and clients.
3. Routine disruption: Retirement can bring about significant changes in routine, which can take time to adjust to. You may miss the daily structure and predictability of working life.
4

In [None]:
# Replace 'file_path' with the path to your text file
file_path = '/content/testdata.txt'
dataset_test = []

with open(file_path, 'r') as file:
    for line in file:
        json_line = json.loads(line.strip())
        dataset_test.append(json_line)

In [None]:
conversation_user = []
conversation_golden_responses = []

for conversation_data in dataset_test:

    list_of_user_inputs = []
    golden_responses = []

    accumulated_user_input = ""

    for turn in conversation_data['dialog']:
        if turn['speaker'] == 'usr':
            user_input = turn['text'].strip()
            accumulated_user_input += user_input + " "

        elif turn['speaker'] == 'sys':
            sys_response = turn['text'].strip()

            if accumulated_user_input:
                list_of_user_inputs.append(accumulated_user_input)
                golden_responses.append(sys_response)
                accumulated_user_input = ""

    if accumulated_user_input:
        list_of_user_inputs.append(accumulated_user_input)
        golden_responses.append("")

    # Append the lists for the current conversation to the main lists
    conversation_user.append(list_of_user_inputs)
    conversation_golden_responses.append(golden_responses)


In [None]:
def generate_model_responses_with_golden_history(conversation_user, conversation_golden_responses):
    model_responses = []
    chat_history = []

    for user_input, golden_responses in zip(conversation_user, conversation_golden_responses):
        # Generate the response from the model
        response = generate_response(user_input, chat_history)
        model_responses.append(response)

        # Update the chat history
        chat_history.append((user_input, golden_responses))

    return model_responses


model_responses = []
for convo_number, (convo, golden_response) in enumerate(zip(conversation_user, conversation_golden_responses), 1):
    print(f'\rGenerating response for convo {convo_number}/195', end='')
    model_response = generate_model_responses_with_golden_history(convo, golden_response)
    model_responses.append(model_response)

    torch.cuda.empty_cache()



Generating response for convo 195/195

In [None]:
model_response_flattened = [item for sublist in model_responses for item in sublist]
conversation_golden_responses_flattened = [item for sublist in conversation_golden_responses for item in sublist]

if len(model_response_flattened) != len(conversation_golden_responses_flattened):
    print("Error: Lists are not of the same length.")
else:
    # Specify the filename
    filename = "base_outputs.csv"

    # Writing to csv file
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        # Create a csv writer object
        csvwriter = csv.writer(csvfile)

        # Write the column headers
        csvwriter.writerow(['model', 'golden'])

        # Write the data
        for i in range(len(model_response_flattened)):
            csvwriter.writerow([model_response_flattened[i], conversation_golden_responses_flattened[i]])

    print(f"CSV file '{filename}' created successfully.")



CSV file 'test.csv' created successfully.
