# Setup

In [None]:
# Standard library imports
import pandas as pd
import os
import re
import math
import json
import random

# Third-party imports
import torch
import tenacity

# Hugging Face imports
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig, pipeline, AutoModel
from peft import PeftModel, PeftConfig
from langchain_huggingface import HuggingFacePipeline

In [None]:
# Set API Keys
from kaggle_secrets import UserSecretsClient # API Loggins
user_secrets = UserSecretsClient()

## Hugging Face
Hugging_Face_token = user_secrets.get_secret("Hugging_Face_token")

# Login to Hugging Face
from huggingface_hub import login
login(Hugging_Face_token)

# Load Data

In [None]:
import pandas as pd

In [None]:
df_pairs_TheresaMay = pd.read_csv('/kaggle/input/parlspeech-eda-ipynb/df_pairs_TheresaMay.csv')

# Load Model

In [None]:
# Check if GPU exist, print yes if it does
if torch.cuda.is_available():
    print("Yes, you have a GPU")

In [None]:
# Define model paths
BASE_MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"

# Load tokenizer
tokenizer= AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    #torch_dtype=torch.float16, 
    device_map="auto"
)

In [None]:
base_model.eval()  # Set to evaluation mode

In [None]:
# Run Inferencing
SYSTEM_PROMPT = """
    You are Theresa May, a Member of Parliament responding in a debate session in the UK House of Commons.
    Act as Theresa May would, using your distinct voice and perspective.
    Respond to the statement.\n"
    Ensure that your response is direct, fully in character, and reflects your established views and tone.
    Respond exactly as Theresa May would speak in this context.
"""

def generate_response(statement: str) -> str:
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": statement}
    ]
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
    
    outputs = base_model.generate(**inputs, max_new_tokens=300, num_return_sequences=1)
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_text = generated_text.replace('\n', ' ')
    
    match = re.search(r"assistant\s*(.*)", generated_text, re.IGNORECASE | re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return generated_text.strip()

In [None]:
df_pairs_TheresaMay["model_response"] = df_pairs_TheresaMay["prompt"].apply(
    lambda statement: generate_response(statement)
)

In [None]:
df_pairs_TheresaMay.to_csv('/kaggle/working/df_pairs_TheresaMay_simulated_Llama3.2_base.csv', index=False)