Package, Model and Dataset instantiation

In [1]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
import random
import re
from trl import GRPOConfig, GRPOTrainer, get_peft_config, ModelConfig, TrlParser
from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training, LoftQConfig
import types


model_path = "C:/Users/Antoi/documents/resumeProjects/minimathllm/fuckItWeBall/huginn-0125_checkpoint/huginn-0125"

training_set_path = "C:/Users/Antoi/documents/resumeProjects/minimathllm/data/openmath1/OpenMathInstruct-1_train.csv"
test_set_path = "C:/Users/Antoi/documents/resumeProjects/minimathllm/data/openmath1/OpenMathInstruct-1_validation.csv"

tokenizer = AutoTokenizer.from_pretrained(model_path)

training_set_df = pd.read_csv(training_set_path)
test_set_df = pd.read_csv(test_set_path)

training_set_df = training_set_df.rename(columns={"question": "prompt"})
test_set_df = test_set_df.rename(columns={"question": "prompt"})




print("cuda available? " + str(torch.cuda.is_available()))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

training_set = Dataset.from_pandas(training_set_df)
test_set = Dataset.from_pandas(test_set_df)

print("device name: " + str(torch.cuda.get_device_name()))

# model.to(device)

  from .autonotebook import tqdm as notebook_tqdm



cuda available? True
device name: NVIDIA GeForce RTX 4070 Ti SUPER


Definition of reward functions

In [2]:
def format_rewards(completions, **kwargs):
    rewards = []

    for completion in completions:
        try:
            pattern = r"^<think>([^<]*(?:<(?!/?think>)[^<]*)*)<\/think>\s*Thus the answer is: \boxed{([\s\S]*?)}$"
            match = re.search(pattern, completion, re.DOTALL)

            if match is None or len(match.groups()) != 2:
                rewards.append(0.0)
            else:
                rewards.append(1.0)

        except Exception:
            rewards.append(0.0)
            
    return rewards
            



In [3]:
def accuracy_rewards(completions,expected_answer, **kwargs):
    rewards = []

    for completion, answer in zip(completions, expected_answer):
        try: 

            if (random.random()) <= 0.01:
                print("Completion Length: " + str(len(completion)))
            answerFormatPattern = r"Thus the answer is: \boxed{([\s\S]*?)}$"
            answerFormatMatch = re.search(answerFormatPattern, completion, re.DOTALL)

            if answerFormatMatch is None: 
                rewards.append(0.0)
                continue
            
            answerExtractionPattern = r"\\boxed{([\s\S]*?)}"
            answerExtractionMatch = re.search(answerExtractionPattern, completion)

            if answerExtractionMatch is None:
                rewards.append(0.0)
                continue

            if answerExtractionMatch.group(1).strip() == str(answer):
                rewards.append(1.0)

            else:
                rewards.append(0.0)

        except Exception:
            rewards.append(0.0)
    return rewards

Generate prompt to allow model CoT

In [4]:
def generate_CoT_prompt(question):
    messages = []
    messages.append({"role": "system", 
                     "content" : "You are a helpful mathematician. You first think about the reasoning process in your mind and then provide the user with the answer."})
    messages.append({"role": "user", "content" : f"Showing full working out, answer the following question: {question}" + r"Show your work and reasoning process in <think> </think> tags. And return the final equation in the form \boxed{answer}, for example <think> reasoning steps </think> Thus the answer is: \boxed{27}. Think step by step inside <think> tags."})
    chat_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    return {
        "question": chat_input
    }

training_set = training_set.map(generate_CoT_prompt)
test_set     = test_set.map(generate_CoT_prompt)

Map: 100%|██████████| 5553234/5553234 [04:14<00:00, 21835.78 examples/s]
Map: 100%|██████████| 847570/847570 [00:38<00:00, 22229.68 examples/s]


In [5]:
# # def convert_attention_mask(example):
# #     if "attention_mask" in example:
# #         # Assuming 1 indicates a valid token and 0 indicates a masked token:
# #         example["attention_mask"] = [float(x) for x in example["attention_mask"]]
# #     return example


# # def convert_attention_mask(example):
# #     if "attention_mask" in example:
# #         # Convert the list to a torch tensor with float type
# #         example["attention_mask"] = torch.tensor(example["attention_mask"], dtype=torch.float)
# #     return example

# def convert_attention_mask(example):
#     if "attention_mask" in example:
#         # Convert the list to a torch tensor of floats and unsqueeze to have shape [B, 1, 1, key_length]
#         example["attention_mask"] = torch.tensor(example["attention_mask"], dtype=torch.float).unsqueeze(0).unsqueeze(0)
#     return example

# # When mapping your dataset:
# training_set = training_set.map(convert_attention_mask)
# test_set = test_set.map(convert_attention_mask)

Helpers that Huginn does not currently support

In [6]:
# #Workaround since Huginn is currently not compatible with quantizing

# def get_input_embeddings(self):
#     return self.lm_head

# def set_input_embeddings(self, value):
#     self.lm_head = value




Prepare Model Quantization

In [7]:
quantizationConfig = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)



model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, trust_remote_code=True,  quantization_config=quantizationConfig)

for name, module in model.named_modules():
    print(name)


# model.get_input_embeddings = types.MethodType(get_input_embeddings, model)
# model.set_input_embeddings = types.MethodType(set_input_embeddings, model)
loftq_config = LoftQConfig(loftq_bits = 4)

model = prepare_model_for_kbit_training(
    model,
    use_gradient_checkpointing=False
)

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00,  2.55s/it]


transformer
transformer.wte
transformer.prelude
transformer.prelude.0
transformer.prelude.0.norm_1
transformer.prelude.0.attn
transformer.prelude.0.attn.Wqkv
transformer.prelude.0.attn.proj
transformer.prelude.0.norm_2
transformer.prelude.0.mlp
transformer.prelude.0.mlp.fc
transformer.prelude.0.mlp.proj
transformer.prelude.0.mlp.nonlin
transformer.prelude.0.norm_3
transformer.prelude.0.norm_4
transformer.prelude.1
transformer.prelude.1.norm_1
transformer.prelude.1.attn
transformer.prelude.1.attn.Wqkv
transformer.prelude.1.attn.proj
transformer.prelude.1.norm_2
transformer.prelude.1.mlp
transformer.prelude.1.mlp.fc
transformer.prelude.1.mlp.proj
transformer.prelude.1.mlp.nonlin
transformer.prelude.1.norm_3
transformer.prelude.1.norm_4
transformer.adapter
transformer.core_block
transformer.core_block.0
transformer.core_block.0.norm_1
transformer.core_block.0.attn
transformer.core_block.0.attn.Wqkv
transformer.core_block.0.attn.proj
transformer.core_block.0.norm_2
transformer.core_block.




In [8]:
target_modules = [
    "attn.Wqkv",
    "attn.proj",
    "mlp.fc",
    "mlp.proj"
]

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, # types here: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/peft_types.py#L22
    inference_mode=False,  # false in training, true when inferring
    ## YOUR SOLUTION HERE ##
    loftq_config=loftq_config,
    r=16, # Rank of low-rank matrices
    lora_alpha=32,  
    lora_dropout=0.1,  # Dropout rate, helps prevent overfitting
    target_modules=target_modules,

)


qlora_model = get_peft_model(model, peft_config)

qlora_model = qlora_model.to(device)



Prepare trainer and training loop

In [None]:

training_args = GRPOConfig(
    output_dir="./temp_results",
    num_train_epochs=2,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    learning_rate=1e-4,
    logging_steps=10,
    save_strategy="no",
    num_generations = 4
)


trainer = GRPOTrainer(
      model=qlora_model,
      reward_funcs=[format_rewards, accuracy_rewards],
      args=training_args,
      train_dataset=training_set,
      eval_dataset=test_set,
    )

trainer.train()

trainer.evaluate()

trainer.save_model("./final_model")

Step,Training Loss
