In [1]:
from datasets import load_dataset
from dataset_generator import (
    generate_completion_dataset,
    generate_corrective_dataset,
    generate_kto_dataset,
    generate_copy_dataset,
)
from transformers import (
    pipeline,
    BitsAndBytesConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
)
import re

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="bfloat16",
)
model_path = "microsoft/Phi-3-mini-4k-instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_path, quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token_id = tokenizer.eos_token_id
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

generate_kargs = {"max_new_tokens": 2048, "do_sample": True, "batch_size": 32}

dataset = load_dataset("openai/gsm8k", "main")
testing_set = dataset["test"]

# Write zero-shot CoT prompts with regex to format the output
prompt = "Here is a math problem: {problem}, please solve it step by step. And generate the answer with the following format: \\boxed{{answer}}."

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

def get_answer_from_output(text):
    match = re.search(r"\\boxed{(.*?)}", text)
    if match:
        answer = match.group(1)
        if is_number(answer):
            return float(answer)
    return None

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
completion_dataset = generate_completion_dataset(
    pipe,
    testing_set,
    prompt,
    get_answer_from_output,
    generate_kwargs=generate_kargs,
    generate_count_per_problem=1,
)
completion_dataset[:]

# Calculate the accuracy of the model
correct = 0
total = 0
for problem in completion_dataset:
    total += 1
    if problem["correct"]:
        correct += 1
        
print(f"Accuracy: {correct/total}")

  0%|          | 0/1319 [00:00<?, ?it/s]

ValueError: could not convert string to float: 'It takes 2/2=<<2/2=1>>1 bolt of white fiber\nSo the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric\n#### 3'