In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
prompt_path = "prompts/python_cot.txt"
dataset_path = "data/MATH_DPO_COT"
checkpoint_path = "checkpoint_kto_dataset_python_cot.yaml"

prompt = open(prompt_path, "r").read()

In [3]:
from datasets import load_dataset, Dataset
import re
from tqdm.auto import tqdm
import os
import yaml
from trl import TextEnvironment, AutoModelForCausalLMWithValueHead
from transformers import (
    load_tool,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

In [4]:
import numpy as np


def _exact_match_reward(responses, answers):
    """Reward if generated response contains correct answer."""
    rewards = []
    for response, answer in zip(responses, answers):
        reward = 0.0
        predicted_number = _get_answer(response)
        if predicted_number is not None:
            if np.abs(predicted_number - float(answer)) < 0.1:
                reward += 1.0
        else:
            reward = 0.0
        rewards.append(reward)
    return rewards


def _get_answer(response):
    try:
        pattern = r"Result\s*=\s*(-?\d+(?:\.\d+)?)\s*<submit>"
        match_pattern = re.findall(pattern, response)
        if match_pattern:
            return float(match_pattern[0])
        else:
            return None
    except Exception:
        return None

In [5]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="bfloat16",
)

model = AutoModelForCausalLMWithValueHead.from_pretrained(
    model_name, quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

env = TextEnvironment(
    model,
    tokenizer,
    [load_tool("lvwerra/python-interpreter")],
    _exact_match_reward,
    prompt,
    generation_kwargs={"max_new_tokens": 512, "pad_token_id": tokenizer.eos_token_id},
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You're loading a tool from the Hub from None. Please make sure this is a source that you trust as the code within that tool will be executed on your machine. Always verify the code of the tools that you load. We recommend specifying a `revision` to ensure you're loading the code that you have checked.


[2024-06-18 16:52:30,592] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status


In [6]:
dataset = load_dataset("json", data_dir="data/MATH")


def is_real_number(text):
    try:
        float(text)
        return True
    except Exception:
        return False


def extract_answer(text):
    try:
        match = re.search(r"\\boxed{(.+?)}", text)
        return match.group(1)
    except Exception:
        return None


dataset_with_answer = dataset.map(
    lambda x: {"problem": x["problem"], "answer": extract_answer(x["solution"])}
)
dataset_with_answer = dataset_with_answer.filter(lambda x: is_real_number(x["answer"]))
dataset_with_answer = dataset_with_answer.filter(lambda x: len(x["problem"]) < 500)
dataset_with_answer = dataset_with_answer.rename_column("problem", "query")

Resolving data files:   0%|          | 0/7500 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/5000 [00:00<?, ?it/s]

In [7]:
prompts = []
completions = []
labels = []

batch_size = 8
epochs = 4


start_index = 0
start_epoch = 0
if os.path.exists(checkpoint_path):
    with open(checkpoint_path, "r") as f:
        checkpoint = yaml.safe_load(f)
        start_index = checkpoint.get("start_index", 0)
        start_epoch = checkpoint.get("epoch", 0)
        prompts = checkpoint.get("prompts", [])
        completions = checkpoint.get("completions", [])
        labels = checkpoint.get("labels", [])
for epoch in range(start_epoch, epochs):
    for i in tqdm(range(start_index, len(dataset_with_answer["train"]), batch_size)):
        batch_rows = dataset_with_answer["train"][i : i + batch_size]

        queries_tensor, responses_tensor, masks, rewards, histories = env.run(
            batch_rows["query"], answers=batch_rows["answer"]
        )
        responses = tokenizer.batch_decode(responses_tensor)
        for query, response, reward in zip(batch_rows["query"], responses, rewards):
            prompts.append(prompt + query)
            completions.append(response)
            labels.append(True if reward > 0 else False)
        checkpoint = {
            "start_index": i + batch_size,
            "epoch": epoch,
            "prompts": prompts,
            "completions": completions,
            "labels": labels,
        }
        with open(checkpoint_path, "w") as f:
            yaml.safe_dump(checkpoint, f)

new_dataset = Dataset.from_dict(
    {"prompt": prompts, "completion": completions, "label": labels}
)
os.makedirs(dataset_path, exist_ok=True)
new_dataset.save_to_disk(dataset_path)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Saving the dataset (0/1 shards):   0%|          | 0/4567 [00:00<?, ? examples/s]