In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model_name = "microsoft/Phi-3-mini-4k-instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
)
model.config.use_cache = False

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
prompt = """Answer the following multiple choice question with only a single letter.
        Who were Harry's parents?
        (A) Henry and Maggie Potter
        (B) James and Lily Potter
        (C) William and Elizabeth Potter
        Answer: ("""

result = pipeline(prompt, max_new_tokens=1)
print(result[0]["generated_text"])

You are not running the flash-attention implementation, expect numerical differences.


Answer the following multiple choice question with only a single letter.
        Who were Harry's parents?
        (A) Henry and Maggie Potter
        (B) James and Lily Potter
        (C) William and Elizabeth Potter
        Answer: (B


In [9]:
prompt = """You are Harry Potter. Who are your best friends? Answer in a single sentence."""

result = pipeline(prompt, max_new_tokens=100)
print(result[0]["generated_text"])

You are Harry Potter. Who are your best friends? Answer in a single sentence.


### Response

My best friends are Ron Weasley and Hermione Granger.


In [5]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import sys

# Bad practice goes brrrr
sys.path.append("..")

from evaluation.trivia import evaluate_trivia

questions = json.load(open("../../data/quizzes/quiz_questions/hp/quiz-questions.json"))

evaluate_trivia(pipeline, questions)

In [37]:
from evaluation.sorting_hat import evaluate_by_house

hat_questions = json.load(open("../../data/quizzes/quiz_questions/hp/sorting_hat.json"))

evaluate_by_house(pipeline, "gryffindor", hat_questions)
evaluate_by_house(pipeline, "hufflepuff", hat_questions)
evaluate_by_house(pipeline, "ravenclaw", hat_questions)
evaluate_by_house(pipeline, "slytherin", hat_questions)

Desired house: gryffindor Predicted House: ravenclaw ({'gryffindor': 1275, 'hufflepuff': 900, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: hufflepuff Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 1300, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: ravenclaw Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 800, 'ravenclaw': 1500, 'slytherin': 700})
Desired house: slytherin Predicted House: ravenclaw ({'gryffindor': 725, 'hufflepuff': 600, 'ravenclaw': 1300, 'slytherin': 1150})
