# Hands-on: Setting Closed Book Baseline

## Installation

In [1]:
!pip install datasets evaluate transformers accelerate bitsandbytes sentencepiece

Collecting datasets
  Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/e3/f5/668b3444a2f487b0052b908af631fe39eeb2bdb2359d9bbc2c3b80b71119/datasets-3.5.1-py3-none-any.whl.metadata
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Obtaining dependency information for evaluate from https://files.pythonhosted.org/packages/a2/e7/cbca9e2d2590eb9b5aa8f7ebabe1beb1498f9462d2ecede5c9fd9735faaf/evaluate-0.4.3-py3-none-any.whl.metadata
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting accelerate
  Obtaining dependency information for accelerate from https://files.pythonhosted.org/packages/63/b1/8198e3cdd11a426b1df2912e3381018c4a4a55368f6d0857ba3ca418ef93/accelerate-1.6.0-py3-none-any.whl.metadata
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Obtaining dependency information for bitsandbytes from https://files.pythonhosted.org/packages/9b/63/489ef9cd7a33

## Imports

In [2]:
import random, torch, evaluate, json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login
login(token='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')

## Load Data

In [None]:
ds = load_dataset("hotpot_qa", "distractor", split="train[:200]")
questions = ds["question"][:25]
gold_answers = ds["answer"][:25]

README.md:   0%|          | 0.00/9.19k [00:00<?, ?B/s]

hotpot_qa.py:   0%|          | 0.00/6.42k [00:00<?, ?B/s]

In [None]:
ds.keys()

In [None]:
questions[0]

In [None]:
gold_answers[0]

## Load Model

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model      = AutoModelForCausalLM.from_pretrained(
                 model_name, device_map="auto", torch_dtype=torch.float16)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer,
                     temperature=0.1,
                     max_new_tokens=128)

## Closed-book LLM Prediction

In [None]:
predictions = []

for q in questions:
    prompt = ( "You are an expert question-answering system.\n"
               f"Question: {q}\n"
               "Answer briefly:\n" )
    ans = generator(prompt)[0]["generated_text"].split("Answer briefly:\n")[-1]
    print(f"{q} -> {ans}")
    predictions.append(ans.strip())


In [None]:
squad = evaluate.load("squad")
results = squad.compute(predictions=predictions, references=gold_answers)
print(json.dumps(results, indent=2))