In [None]:
!git clone https://github.com/brimmann/llm-recipes-2.git

In [None]:
%cd /content/llm-recipes-2

In [None]:
!pip install "datasets==3.6" "transformers>=4.57.1" "sentencepiece==0.2.1" "wandb>=0.22.2" "peft==0.9"

In [None]:
from datasets import load_dataset
ds = load_dataset("brimmann2/squad-v2-sampled2")

In [None]:
ds

In [None]:
from prompt.prompt import create_chat_prompt

In [None]:
def create_prompt_column(item, tokenizer):
    item['prompt'] = create_chat_prompt(
                "qa", 0,
                title = "",
                context = item['context'],
                question = item['question'],
                sys_user = True,
                chat_template = tokenizer.apply_chat_template
            )
    return item

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.add_special_tokens({"pad_token":tokenizer.eos_token})
tokenizer.padding_side = 'left'

In [None]:
nds = ds.map(lambda item: create_prompt_column(item, tokenizer))

In [None]:
nds

In [None]:
def tokenization(items, tokenizer):
    return tokenizer(items["prompt"], padding='longest')

In [None]:
nnds = nds.map(lambda items: tokenization(items, tokenizer=tokenizer), batched=True, batch_size=4)


In [None]:
nnds.set_format(type="torch", columns=["input_ids", "attention_mask"])

In [None]:
from torch.utils.data import DataLoader
dataloader = DataLoader(nnds, batch_size=8, num_workers=0)

In [None]:
predictions = []

In [None]:
import torch
from tqdm import tqdm
device = "cuda"

In [None]:
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf").to(device)

In [None]:
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id
model.eval()

In [None]:
with torch.no_grad():
        for batch in tqdm(dataloader):
            output = model.generate(
                batch['input_ids'].to(device),
                attention_mask=batch['attention_mask'].to(device),
                max_new_tokens=150,
                do_sample=False,
                eos_token_id= tokenizer.eos_token_id
            )
            output = output[:, len(batch['input_ids'][0]):]
            sentences = tokenizer.batch_decode(output, skip_special_tokens=True)
            for i in range(len(sentences)):
                sentences[i] = sentences[i].split('\n')[0].strip()
            predictions.append(sentences)

In [None]:
from datasets import Dataset
from itertools import chain

In [None]:
dataset_generated = Dataset.from_dict({
                'context': nnds['context'],
                'question': nnds['question'],
                'answers': nnds['answers'],
                'answers_generated': list(chain(*predictions))
            })