In [12]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

In [13]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model_name = "microsoft/Phi-3-mini-4k-instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
)
model.config.use_cache = False

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

original_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
prompt = """Answer the following multiple choice question with only a single letter.
        Who were Harry's parents?
        (A) Henry and Maggie Potter
        (B) James and Lily Potter
        (C) William and Elizabeth Potter
        Answer: ("""

result = original_generator(prompt, max_new_tokens=1)
print(result[0]["generated_text"])

Answer the following multiple choice question with only a single letter.
        Who were Harry's parents?
        (A) Henry and Maggie Potter
        (B) James and Lily Potter
        (C) William and Elizabeth Potter
        Answer: (B


In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import json
import sys

# Bad practice goes brrrr
sys.path.append("..")

from evaluation.trivia import evaluate_trivia

questions = json.load(open("../../data/quizzes/quiz_questions/hp/quiz-questions.json"))

evaluate_trivia(original_generator, questions)

  0%|          | 0/108 [00:00<?, ?it/s]

100%|██████████| 108/108 [00:09<00:00, 11.04it/s]

Correct: 78/108 (72.22%)





In [18]:
from evaluation.sorting_hat import evaluate_by_house

hat_questions = json.load(open("../../data/quizzes/quiz_questions/hp/sorting_hat.json"))

evaluate_by_house(original_generator, "gryffindor", hat_questions)
evaluate_by_house(original_generator, "hufflepuff", hat_questions)
evaluate_by_house(original_generator, "ravenclaw", hat_questions)
evaluate_by_house(original_generator, "slytherin", hat_questions)

Desired house: gryffindor Predicted House: ravenclaw ({'gryffindor': 1275, 'hufflepuff': 900, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: hufflepuff Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 1300, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: ravenclaw Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 800, 'ravenclaw': 1500, 'slytherin': 700})
Desired house: slytherin Predicted House: ravenclaw ({'gryffindor': 725, 'hufflepuff': 600, 'ravenclaw': 1300, 'slytherin': 1150})


In [19]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig.from_pretrained("../outputs")
finetuned_model = get_peft_model(model, lora_config)
finetuned_generator = pipeline("text-generation", model=finetuned_model, tokenizer=tokenizer)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyFo

In [20]:
evaluate_trivia(finetuned_generator, questions)

evaluate_by_house(finetuned_generator, "gryffindor", hat_questions)
evaluate_by_house(finetuned_generator, "hufflepuff", hat_questions)
evaluate_by_house(finetuned_generator, "ravenclaw", hat_questions)
evaluate_by_house(finetuned_generator, "slytherin", hat_questions)

  0%|          | 0/108 [00:00<?, ?it/s]

100%|██████████| 108/108 [00:11<00:00,  9.56it/s]


Correct: 78/108 (72.22%)
Desired house: gryffindor Predicted House: ravenclaw ({'gryffindor': 1275, 'hufflepuff': 900, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: hufflepuff Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 1300, 'ravenclaw': 1500, 'slytherin': 300})
Desired house: ravenclaw Predicted House: ravenclaw ({'gryffindor': 925, 'hufflepuff': 800, 'ravenclaw': 1500, 'slytherin': 700})
Desired house: slytherin Predicted House: ravenclaw ({'gryffindor': 725, 'hufflepuff': 600, 'ravenclaw': 1300, 'slytherin': 1150})
