# Setup

In [1]:
import os
import json
import gc
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from model.setup import setup_model, save_model, load_model, load_tokenizer
from utils.console import isYes, printc, inputc, print_section
from model.training import preprocess_for_training, train_model
from data.gsm8k import prepare_gsm8k_dataset
from data.svamp import prepare_svamp_dataset
from data.arithmetic import prepare_arithmetic_datasets
from evaluation.math_evaluation import evaluate_math_performance
from evaluation.eval_pipeline import eval_model
from constants import MODEL_NAME, INITIAL_SAVE_PATH, TOOL_FINETUNED_SAVE_PATH, DATASET, CHECKPOINTS, TOOL_TRAIN_DATASET_PATH, PURE_TRAIN_DATASET_PATH, EVAL_DATASET_PATH
from data.arithmetic import combine_and_tokenize
import wandb
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Constants
current_epochs_tool = 1
current_epochs_pure = 1

In [3]:
print_section("Loading Model")
gc.collect()
torch.cuda.empty_cache()
# Try to load from saved path first, if it fails, download from HF
try:
    model, metadata = load_model(os.path.join(CHECKPOINTS, "pretrained", INITIAL_SAVE_PATH))
    tokenizer = load_tokenizer(os.path.join(CHECKPOINTS, "pretrained", INITIAL_SAVE_PATH))
    print("Loaded model from saved path")
except FileNotFoundError:
    print(f"Initial model not found. Setting up from {MODEL_NAME}")
    tokenizer, model, metadata = setup_model(MODEL_NAME)
    save_model(model, tokenizer, os.path.join(CHECKPOINTS, "pretrained", INITIAL_SAVE_PATH))

print_section("Adding Tool Tokens")
tool_tokens = {
    "additional_special_tokens": [
        "<tool:calculator>",
        "</tool>",
    ]
}
num_added = tokenizer.add_special_tokens(tool_tokens)
print(f"Added {num_added} special tokens to the tokenizer")
model.resize_token_embeddings(len(tokenizer))
print(f"Resized model embeddings to {len(tokenizer)} tokens")
print("Special tokens:", tokenizer.all_special_tokens)


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


[032m
Loading Model
[0m
Loading model from local path: ./checkpoints\pretrained\qwen-initial
Loaded model from saved path
[032m
Adding Tool Tokens
[0m
Added 2 special tokens to the tokenizer
Resized model embeddings to 151648 tokens
Special tokens: ['<|endoftext|>', '<tool:calculator>', '</tool>']


In [4]:
# LOAD DATA
print_section("Loading Data")
# Prepare datasets
dataset = prepare_arithmetic_datasets()
train_data = dataset["train_dict"]
test_data = dataset["test_dict"]
train_transformed_data = dataset["train_transformed_dict"]
test_transformed_data = dataset["test_transformed_dict"]
print(train_transformed_data['arithmetic_2da'][3])
print(test_transformed_data['arithmetic_2da'][3])
print(train_transformed_data['arithmetic_2da'][-1])
print(train_transformed_data['arithmetic_2da'][-1])

[032m
Loading Data
[0m
Processing dataset configuration: arithmetic_1dc
Processing dataset configuration: arithmetic_2da
{'question': 'Question: What is 31 plus 72?\nAnswer:', 'final_answer': '<tool:calculator>31 + 72</tool>'}
{'question': 'Question: What is 12 plus 63?\nAnswer:', 'final_answer': '<tool:calculator>12 + 63</tool>'}
{'question': 'Question: What is 46 plus 53?\nAnswer:', 'final_answer': '<tool:calculator>46 + 53</tool>'}
{'question': 'Question: What is 46 plus 53?\nAnswer:', 'final_answer': '<tool:calculator>46 + 53</tool>'}


# Pretrained

In [6]:
# PRETAINED MODEL EVALUATION
print_section("Pretrained Model Evaluation")
eval_model(MODEL_NAME, DATASET, test_data, model, tokenizer, use_tool=False)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


[032m
Pretrained Model Evaluation
[0m
Evaluating math performance on dataset arithmetic_1dc
Evaluating math performance...
------
Question: What is (4 - 2) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Expected: 9
Model: 9
Correct: 1, Total: 1


------
Question: What is (1 + 8) - 8?
Answer:
------


KeyboardInterrupt: 

# Toolformer

In [None]:
# TOOLFORMER FINE TUNING TRAINING
print_section("Toolformer Fine Tuning Training")

# Prepare the training data based on dataset type
train_dataset = combine_and_tokenize(train_transformed_data, tokenizer, path=TOOL_TRAIN_DATASET_PATH)

# Create a small evaluation dataset directly instead of using combine_and_tokenize
eval_examples = []
for config_name, config_dataset in test_transformed_data.items():
    # Take at most 5 examples from each configuration
    sample_size = 1
    for i in range(sample_size):
        if isinstance(config_dataset[i], dict):
            eval_examples.append({
                "question": config_dataset[i]["question"],
                "final_answer": config_dataset[i]["final_answer"]
            })

# Create the evaluation dataset directly
eval_dataset = Dataset.from_list(eval_examples)
eval_dataset = eval_dataset.map(
    lambda examples: preprocess_for_training(examples, tokenizer),
    batched=True,
    remove_columns=eval_dataset.column_names
)

print(f"Created evaluation dataset with {len(eval_dataset)} examples for monitoring")

# Load previous model if it exists
try:
    previous_path = os.path.join(CHECKPOINTS, "finetuned", TOOL_FINETUNED_SAVE_PATH)
    tokenizer, model, metadata = load_model(previous_path)
    
    # Get total epochs from metadata
    total_epochs = metadata.get("total_epochs", 0) + current_epochs_tool
    print(f"Continuing training from {metadata.get('total_epochs', 0)} epochs to {total_epochs} epochs")
except FileNotFoundError:
    # Start fresh training
    total_epochs = current_epochs_tool
    print(f"Starting fresh training for {current_epochs_tool} epochs")

# Train the model
model, tokenizer, metadata = train_model(model, tokenizer, train_dataset, num_epochs=current_epochs_tool, eval_dataset=eval_dataset)

# Save with updated epoch count
saved_path = save_model(
    model, 
    tokenizer, 
    os.path.join(CHECKPOINTS, "finetuned", TOOL_FINETUNED_SAVE_PATH),
    epochs=current_epochs_tool,
    total_epochs=total_epochs
)
print(f"Saved fine-tuned model to {saved_path} (Total epochs: {total_epochs})")

In [None]:
print_section("Latest Checkpoint Evaluation")
model, metadata = load_model(os.path.join(os.curdir, "toolformer_model", "checkpoint-225"))
print_section("Most recent training Model Evaluation")
eval_model(MODEL_NAME, DATASET, test_data, model, tokenizer, use_tool=True)

[032m
Latest Checkpoint Evaluation
[0m
Loading model from local path: .\toolformer_model\checkpoint-225


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


[032m
Most recent training Model Evaluation
[0m
Evaluating math performance on dataset arithmetic_1dc
Evaluating math performance...
------
Question: What is (4 - 2) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 2) + 7?
Answer:

Answer: 11
--------------------
Expected: 9
Model: 11
Correct: 0, Total: 1


------
Question: What is (1 + 8) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 8) - 8?
Answer:

Answer: 1
--------------------
Expected: 1
Model: 1
Correct: 1, Total: 2


------
Question: What is (4 - 3) - 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 3) - 4?
Answer:

Answer: 1
--------------------
Expected: -3
Model: 1
Correct: 1, Total: 3


------
Question: What is (1 + 2) - 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 2) - 7?
Answer:

Answer: 1
--------------------
Expected: -4
Model: 1
Correct: 1, Total: 4


------
Question: What is (1 * 3) - 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 3) - 6?
Answer:

Answer: 3
--------------------
Expected: -3
Model: 3
Correct: 1, Total: 5


------
Question: What is (3 + 7) * 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 7) * 2?
Answer:

Answer: 17
--------------------
Expected: 20
Model: 17
Correct: 1, Total: 6


------
Question: What is (8 * 5) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 5) - 9?
Answer:

Answer: 1
--------------------
Expected: 31
Model: 1
Correct: 1, Total: 7


------
Question: What is (4 + 7) * 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 7) * 6?
Answer:

Answer: 54
--------------------
Expected: 66
Model: 54
Correct: 1, Total: 8


------
Question: What is (9 - 7) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 7) * 1?
Answer:

Answer: 1
--------------------
Expected: 2
Model: 1
Correct: 1, Total: 9


------
Question: What is (4 * 8) * 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 8) * 8?
Answer:

Answer: 12
--------------------
Expected: 256
Model: 12
Correct: 1, Total: 10


------
Question: What is (7 - 5) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 5) * 1?
Answer:

Answer: 2
--------------------
Expected: 2
Model: 2
Correct: 2, Total: 11


------
Question: What is (8 * 9) - 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 9) - 6?
Answer:

Answer: 13
--------------------
Expected: 66
Model: 13
Correct: 2, Total: 12


------
Question: What is (6 * 3) - 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 3) - 3?
Answer:

Answer: 1
--------------------
Expected: 15
Model: 1
Correct: 2, Total: 13


------
Question: What is (9 * 5) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 5) - 8?
Answer:

Answer: 1
--------------------
Expected: 37
Model: 1
Correct: 2, Total: 14


------
Question: What is (7 + 1) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 1) * 4?
Answer:

Answer: 21
--------------------
Expected: 32
Model: 21
Correct: 2, Total: 15


------
Question: What is (7 * 1) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 1) * 1?
Answer:

Answer: 7
--------------------
Expected: 7
Model: 7
Correct: 3, Total: 16


------
Question: What is (3 + 9) * 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 9) * 3?
Answer:

Answer: 30
--------------------
Expected: 36
Model: 30
Correct: 3, Total: 17


------
Question: What is (3 * 5) - 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 5) - 6?
Answer:

Answer: 1
--------------------
Expected: 9
Model: 1
Correct: 3, Total: 18


------
Question: What is (5 - 3) - 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 3) - 9?
Answer:

Answer: 1
--------------------
Expected: -7
Model: 1
Correct: 3, Total: 19


------
Question: What is (8 * 9) + 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 9) + 5?
Answer:

Answer: 34
--------------------
Expected: 77
Model: 34
Correct: 3, Total: 20


------
Question: What is (4 + 6) * 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 6) * 2?
Answer:

Answer: 22
--------------------
Expected: 20
Model: 22
Correct: 3, Total: 21


------
Question: What is (8 * 3) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 3) * 3?
Answer:

Answer: 42
--------------------
Expected: 72
Model: 42
Correct: 3, Total: 22


------
Question: What is (9 + 8) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 8) + 7?
Answer:

Answer: 24
--------------------
Expected: 24
Model: 24
Correct: 4, Total: 23


------
Question: What is (6 - 7) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 7) * 3?
Answer:

Answer: 1
--------------------
Expected: -3
Model: 1
Correct: 4, Total: 24


------
Question: What is (7 - 2) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 2) - 8?
Answer:

Answer: 1
--------------------
Expected: -3
Model: 1
Correct: 4, Total: 25


------
Question: What is (3 + 7) + 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 7) + 9?
Answer:

Answer: 21
--------------------
Expected: 19
Model: 21
Correct: 4, Total: 26


------
Question: What is (5 * 1) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 1) - 8?
Answer:

Answer: 1
--------------------
Expected: -3
Model: 1
Correct: 4, Total: 27


------
Question: What is (5 + 2) - 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 + 2) - 2?
Answer:

Answer: 1
--------------------
Expected: 5
Model: 1
Correct: 4, Total: 28


------
Question: What is (5 * 5) + 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 5) + 4?
Answer:

Answer: 19
--------------------
Expected: 29
Model: 19
Correct: 4, Total: 29


------
Question: What is (8 - 1) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 1) * 3?
Answer:

Answer: 11
--------------------
Expected: 21
Model: 11
Correct: 4, Total: 30


------
Question: What is (8 * 4) + 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 4) + 8?
Answer:

Answer: 32
--------------------
Expected: 40
Model: 32
Correct: 4, Total: 31


------
Question: What is (5 - 5) - 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 5) - 2?
Answer:

Answer: 0
--------------------
Expected: -2
Model: 0
Correct: 4, Total: 32


------
Question: What is (7 * 5) - 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 5) - 4?
Answer:

Answer: 1
--------------------
Expected: 31
Model: 1
Correct: 4, Total: 33


------
Question: What is (6 + 1) - 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 + 1) - 7?
Answer:

Answer: 0
--------------------
Expected: 0
Model: 0
Correct: 5, Total: 34


------
Question: What is (1 * 5) + 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 5) + 9?
Answer:

Answer: 14
--------------------
Expected: 14
Model: 14
Correct: 6, Total: 35


------
Question: What is (4 * 7) * 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 7) * 9?
Answer:

Answer: 102
--------------------
Expected: 252
Model: 102
Correct: 6, Total: 36


------
Question: What is (5 + 2) * 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 + 2) * 5?
Answer:

Answer: 35
--------------------
Expected: 35
Model: 35
Correct: 7, Total: 37


------
Question: What is (5 * 7) * 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 7) * 4?
Answer:

Answer: 40
--------------------
Expected: 140
Model: 40
Correct: 7, Total: 38


------
Question: What is (6 - 5) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 5) + 7?
Answer:

Answer: 14
--------------------
Expected: 8
Model: 14
Correct: 7, Total: 39


------
Question: What is (3 - 6) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 6) * 3?
Answer:

Answer: 3
--------------------
Expected: -9
Model: 3
Correct: 7, Total: 40


------
Question: What is (4 - 5) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 5) * 4?
Answer:

Answer: 12
--------------------
Expected: -4
Model: 12
Correct: 7, Total: 41


------
Question: What is (9 + 4) - 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 4) - 3?
Answer:

Answer: 1
--------------------
Expected: 10
Model: 1
Correct: 7, Total: 42


------
Question: What is (7 + 4) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 4) + 7?
Answer:

Answer: 20
--------------------
Expected: 18
Model: 20
Correct: 7, Total: 43


------
Question: What is (5 * 7) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 7) * 1?
Answer:

Answer: 35
--------------------
Expected: 35
Model: 35
Correct: 8, Total: 44


------
Question: What is (7 - 5) + 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 5) + 5?
Answer:

Answer: 10
--------------------
Expected: 7
Model: 10
Correct: 8, Total: 45


------
Question: What is (1 * 4) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 4) * 8?
Answer:

Answer: 32
--------------------
Expected: 32
Model: 32
Correct: 9, Total: 46


------
Question: What is (7 * 1) - 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 1) - 3?
Answer:

Answer: 0
--------------------
Expected: 4
Model: 0
Correct: 9, Total: 47


------
Question: What is (9 + 7) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 7) - 8?
Answer:

Answer: 1
--------------------
Expected: 8
Model: 1
Correct: 9, Total: 48


------
Question: What is (2 + 7) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 + 7) + 7?
Answer:

Answer: 20
--------------------
Expected: 16
Model: 20
Correct: 9, Total: 49


------
Question: What is (9 * 6) - 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 6) - 5?
Answer:

Answer: 19
--------------------
Expected: 49
Model: 19
Correct: 9, Total: 50


------
Question: What is (5 + 8) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 + 8) - 1?
Answer:

Answer: 1
--------------------
Expected: 12
Model: 1
Correct: 9, Total: 51


------
Question: What is (7 - 6) + 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 6) + 6?
Answer:

Answer: 13
--------------------
Expected: 7
Model: 13
Correct: 9, Total: 52


------
Question: What is (1 * 1) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 1) * 4?
Answer:

Answer: 4
--------------------
Expected: 4
Model: 4
Correct: 10, Total: 53


------
Question: What is (7 * 1) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 1) * 3?
Answer:

Answer: 21
--------------------
Expected: 21
Model: 21
Correct: 11, Total: 54


------
Question: What is (5 * 3) + 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 3) + 8?
Answer:

Answer: 23
--------------------
Expected: 23
Model: 23
Correct: 12, Total: 55


------
Question: What is (4 - 5) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 5) * 3?
Answer:

Answer: 1
--------------------
Expected: -3
Model: 1
Correct: 12, Total: 56


------
Question: What is (8 - 9) - 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 9) - 4?
Answer:

Answer: 1
--------------------
Expected: -5
Model: 1
Correct: 12, Total: 57


------
Question: What is (1 - 7) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 - 7) * 8?
Answer:

Answer: 1
--------------------
Expected: -48
Model: 1
Correct: 12, Total: 58


------
Question: What is (2 - 4) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 - 4) + 6?
Answer:

Answer: 1
--------------------
Expected: 4
Model: 1
Correct: 12, Total: 59


------
Question: What is (4 - 9) * 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 9) * 2?
Answer:

Answer: 1
--------------------
Expected: -10
Model: 1
Correct: 12, Total: 60


------
Question: What is (4 - 1) - 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 1) - 2?
Answer:

Answer: 0
--------------------
Expected: 1
Model: 0
Correct: 12, Total: 61


------
Question: What is (4 * 8) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 8) - 9?
Answer:

Answer: 1
--------------------
Expected: 23
Model: 1
Correct: 12, Total: 62


------
Question: What is (7 - 9) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 9) - 1?
Answer:

Answer: 0
--------------------
Expected: -3
Model: 0
Correct: 12, Total: 63


------
Question: What is (3 + 1) - 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 1) - 5?
Answer:

Answer: 0
--------------------
Expected: -1
Model: 0
Correct: 12, Total: 64


------
Question: What is (4 + 1) * 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 1) * 6?
Answer:

Answer: 30
--------------------
Expected: 30
Model: 30
Correct: 13, Total: 65


------
Question: What is (7 * 4) + 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 4) + 8?
Answer:

Answer: 31
--------------------
Expected: 36
Model: 31
Correct: 13, Total: 66


------
Question: What is (5 - 3) * 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 3) * 5?
Answer:

Answer: 10
--------------------
Expected: 10
Model: 10
Correct: 14, Total: 67


------
Question: What is (5 - 9) * 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 9) * 4?
Answer:

Answer: 14
--------------------
Expected: -16
Model: 14
Correct: 14, Total: 68


------
Question: What is (9 * 4) - 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 4) - 2?
Answer:

Answer: 11
--------------------
Expected: 34
Model: 11
Correct: 14, Total: 69


------
Question: What is (8 + 8) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 8) * 8?
Answer:

Answer: 14
--------------------
Expected: 128
Model: 14
Correct: 14, Total: 70


------
Question: What is (6 * 9) + 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 9) + 2?
Answer:

Answer: 21
--------------------
Expected: 56
Model: 21
Correct: 14, Total: 71


------
Question: What is (9 - 8) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 8) - 9?
Answer:

Answer: 0
--------------------
Expected: -8
Model: 0
Correct: 14, Total: 72


------
Question: What is (6 + 9) * 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 + 9) * 3?
Answer:

Answer: 45
--------------------
Expected: 45
Model: 45
Correct: 15, Total: 73


------
Question: What is (3 * 4) * 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 4) * 6?
Answer:

Answer: 48
--------------------
Expected: 72
Model: 48
Correct: 15, Total: 74


------
Question: What is (9 - 9) * 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 9) * 6?
Answer:

Answer: 3
--------------------
Expected: 0
Model: 3
Correct: 15, Total: 75


------
Question: What is (9 - 1) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 1) * 3?
Answer:

Answer: 12
--------------------
Expected: 24
Model: 12
Correct: 15, Total: 76


------
Question: What is (5 + 3) - 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 + 3) - 6?
Answer:

Answer: 0
--------------------
Expected: 2
Model: 0
Correct: 15, Total: 77


------
Question: What is (4 + 7) + 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 7) + 9?
Answer:

Answer: 22
--------------------
Expected: 20
Model: 22
Correct: 15, Total: 78


------
Question: What is (9 * 2) * 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 2) * 1?
Answer:

Answer: 21
--------------------
Expected: 18
Model: 21
Correct: 15, Total: 79


------
Question: What is (7 + 1) + 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 1) + 3?
Answer:

Answer: 10
--------------------
Expected: 11
Model: 10
Correct: 15, Total: 80


------
Question: What is (6 * 1) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 1) * 4?
Answer:

Answer: 32
--------------------
Expected: 24
Model: 32
Correct: 15, Total: 81


------
Question: What is (8 * 8) - 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 8) - 5?
Answer:

Answer: 13
--------------------
Expected: 59
Model: 13
Correct: 15, Total: 82


------
Question: What is (8 + 1) + 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 1) + 4?
Answer:

Answer: 13
--------------------
Expected: 13
Model: 13
Correct: 16, Total: 83


------
Question: What is (8 + 5) * 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 5) * 5?
Answer:

Answer: 50
--------------------
Expected: 65
Model: 50
Correct: 16, Total: 84


------
Question: What is (4 * 3) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 3) - 8?
Answer:

Answer: 1
--------------------
Expected: 4
Model: 1
Correct: 16, Total: 85


------
Question: What is (1 + 9) + 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 9) + 3?
Answer:

Answer: 14
--------------------
Expected: 13
Model: 14
Correct: 16, Total: 86


------
Question: What is (8 + 2) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 2) - 8?
Answer:

Answer: 1
--------------------
Expected: 2
Model: 1
Correct: 16, Total: 87


------
Question: What is (8 * 7) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 7) - 8?
Answer:

Answer: 1
--------------------
Expected: 48
Model: 1
Correct: 16, Total: 88


------
Question: What is (4 - 7) * 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 7) * 2?
Answer:

Answer: 1
--------------------
Expected: -6
Model: 1
Correct: 16, Total: 89


------
Question: What is (1 + 9) - 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 9) - 6?
Answer:

Answer: 2
--------------------
Expected: 4
Model: 2
Correct: 16, Total: 90


------
Question: What is (9 + 4) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 4) - 9?
Answer:

Answer: 1
--------------------
Expected: 4
Model: 1
Correct: 16, Total: 91


------
Question: What is (1 + 6) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 6) + 6?
Answer:

Answer: 13
--------------------
Expected: 13
Model: 13
Correct: 17, Total: 92


------
Question: What is (3 * 5) + 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 5) + 8?
Answer:

Answer: 23
--------------------
Expected: 23
Model: 23
Correct: 18, Total: 93


------
Question: What is (4 * 6) - 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 6) - 9?
Answer:

Answer: 1
--------------------
Expected: 15
Model: 1
Correct: 18, Total: 94


------
Question: What is (8 + 1) + 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 1) + 6?
Answer:

Answer: 17
--------------------
Expected: 15
Model: 17
Correct: 18, Total: 95


------
Question: What is (2 + 6) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 + 6) + 7?
Answer:

Answer: 17
--------------------
Expected: 15
Model: 17
Correct: 18, Total: 96


------
Question: What is (9 * 7) * 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 7) * 9?
Answer:

Answer: 105
--------------------
Expected: 567
Model: 105
Correct: 18, Total: 97


------
Question: What is (2 * 5) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 * 5) + 6?
Answer:

Answer: 17
--------------------
Expected: 16
Model: 17
Correct: 18, Total: 98


------
Question: What is (5 - 3) * 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 3) * 9?
Answer:

Answer: 27
--------------------
Expected: 18
Model: 27
Correct: 18, Total: 99


------
Question: What is (2 - 7) - 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 - 7) - 4?
Answer:

Answer: 1
--------------------
Expected: -9
Model: 1
Correct: 18, Total: 100


------
Question: What is (5 - 1) * 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 1) * 9?
Answer:

Answer: 33
--------------------
Expected: 36
Model: 33
Correct: 18, Total: 101


------
Question: What is (9 - 8) + 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 8) + 5?
Answer:

Answer: 10
--------------------
Expected: 6
Model: 10
Correct: 18, Total: 102


------
Question: What is (7 - 7) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 7) - 9?
Answer:

Answer: 0
--------------------
Expected: -9
Model: 0
Correct: 18, Total: 103


------
Question: What is (8 + 8) + 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 8) + 3?
Answer:

Answer: 21
--------------------
Expected: 19
Model: 21
Correct: 18, Total: 104


------
Question: What is (4 * 9) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 9) * 8?
Answer:

Answer: 104
--------------------
Expected: 288
Model: 104
Correct: 18, Total: 105


------
Question: What is (2 * 8) - 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 * 8) - 5?
Answer:

Answer: 1
--------------------
Expected: 11
Model: 1
Correct: 18, Total: 106


------
Question: What is (7 + 6) + 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 6) + 4?
Answer:

Answer: 19
--------------------
Expected: 17
Model: 19
Correct: 18, Total: 107


------
Question: What is (5 * 3) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 3) + 7?
Answer:

Answer: 20
--------------------
Expected: 22
Model: 20
Correct: 18, Total: 108


------
Question: What is (9 - 6) + 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 6) + 7?
Answer:

Answer: 16
--------------------
Expected: 10
Model: 16
Correct: 18, Total: 109


------
Question: What is (8 * 8) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 8) - 8?
Answer:

Answer: 16
--------------------
Expected: 56
Model: 16
Correct: 18, Total: 110


------
Question: What is (8 + 9) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 9) * 1?
Answer:

Answer: 23
--------------------
Expected: 17
Model: 23
Correct: 18, Total: 111


------
Question: What is (6 + 2) * 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 + 2) * 2?
Answer:

Answer: 14
--------------------
Expected: 16
Model: 14
Correct: 18, Total: 112


------
Question: What is (5 - 8) * 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 8) * 3?
Answer:

Answer: 1
--------------------
Expected: -9
Model: 1
Correct: 18, Total: 113


------
Question: What is (1 - 2) - 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 - 2) - 1?
Answer:

Answer: 0
--------------------
Expected: -2
Model: 0
Correct: 18, Total: 114


------
Question: What is (5 * 9) + 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 9) + 2?
Answer:

Answer: 23
--------------------
Expected: 47
Model: 23
Correct: 18, Total: 115


------
Question: What is (2 * 9) - 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (2 * 9) - 1?
Answer:

Answer: 1
--------------------
Expected: 17
Model: 1
Correct: 18, Total: 116


------
Question: What is (9 * 7) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 7) + 7?
Answer:

Answer: 40
--------------------
Expected: 70
Model: 40
Correct: 18, Total: 117


------
Question: What is (7 - 4) + 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 4) + 4?
Answer:

Answer: 11
--------------------
Expected: 7
Model: 11
Correct: 18, Total: 118


------
Question: What is (7 + 9) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 9) + 6?
Answer:

Answer: 24
--------------------
Expected: 22
Model: 24
Correct: 18, Total: 119


------
Question: What is (8 + 9) + 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 9) + 8?
Answer:

Answer: 25
--------------------
Expected: 25
Model: 25
Correct: 19, Total: 120


------
Question: What is (3 - 5) - 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 5) - 9?
Answer:

Answer: 1
--------------------
Expected: -11
Model: 1
Correct: 19, Total: 121


------
Question: What is (2 + 5) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (2 + 5) * 1?
Answer:

Answer: 7
--------------------
Expected: 7
Model: 7
Correct: 20, Total: 122


------
Question: What is (3 + 8) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 8) + 7?
Answer:

Answer: 20
--------------------
Expected: 18
Model: 20
Correct: 20, Total: 123


------
Question: What is (4 * 7) * 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 7) * 4?
Answer:

Answer: 40
--------------------
Expected: 112
Model: 40
Correct: 20, Total: 124


------
Question: What is (4 - 9) * 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 9) * 6?
Answer:

Answer: 24
--------------------
Expected: -30
Model: 24
Correct: 20, Total: 125


------
Question: What is (7 + 4) - 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 4) - 3?
Answer:

Answer: 1
--------------------
Expected: 8
Model: 1
Correct: 20, Total: 126


------
Question: What is (4 * 5) * 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 5) * 1?
Answer:

Answer: 10
--------------------
Expected: 20
Model: 10
Correct: 20, Total: 127


------
Question: What is (6 * 3) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 3) * 1?
Answer:

Answer: 18
--------------------
Expected: 18
Model: 18
Correct: 21, Total: 128


------
Question: What is (6 * 5) * 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 5) * 3?
Answer:

Answer: 45
--------------------
Expected: 90
Model: 45
Correct: 21, Total: 129


------
Question: What is (3 * 4) + 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 4) + 9?
Answer:

Answer: 23
--------------------
Expected: 21
Model: 23
Correct: 21, Total: 130


------
Question: What is (8 - 2) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 2) - 1?
Answer:

Answer: 1
--------------------
Expected: 5
Model: 1
Correct: 21, Total: 131


------
Question: What is (4 + 9) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 9) * 1?
Answer:

Answer: 13
--------------------
Expected: 13
Model: 13
Correct: 22, Total: 132


------
Question: What is (6 * 5) - 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 5) - 2?
Answer:

Answer: 1
--------------------
Expected: 28
Model: 1
Correct: 22, Total: 133


------
Question: What is (1 * 9) + 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 9) + 6?
Answer:

Answer: 15
--------------------
Expected: 15
Model: 15
Correct: 23, Total: 134


------
Question: What is (7 - 2) * 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 2) * 8?
Answer:

Answer: 32
--------------------
Expected: 40
Model: 32
Correct: 23, Total: 135


------
Question: What is (1 * 4) + 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 4) + 1?
Answer:

Answer: 5
--------------------
Expected: 5
Model: 5
Correct: 24, Total: 136


------
Question: What is (8 + 7) * 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 7) * 7?
Answer:

Answer: 70
--------------------
Expected: 105
Model: 70
Correct: 24, Total: 137


------
Question: What is (6 - 4) - 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 4) - 6?
Answer:

Answer: 1
--------------------
Expected: -4
Model: 1
Correct: 24, Total: 138


------
Question: What is (3 - 3) + 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 3) + 7?
Answer:

Answer: 10
--------------------
Expected: 7
Model: 10
Correct: 24, Total: 139


------
Question: What is (9 * 1) - 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 1) - 2?
Answer:

Answer: 5
--------------------
Expected: 7
Model: 5
Correct: 24, Total: 140


------
Question: What is (8 + 6) + 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 6) + 4?
Answer:

Answer: 21
--------------------
Expected: 18
Model: 21
Correct: 24, Total: 141


------
Question: What is (8 - 1) + 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 1) + 6?
Answer:

Answer: 13
--------------------
Expected: 13
Model: 13
Correct: 25, Total: 142


------
Question: What is (6 + 3) - 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 + 3) - 8?
Answer:

Answer: 1
--------------------
Expected: 1
Model: 1
Correct: 26, Total: 143


------
Question: What is (6 * 7) - 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 7) - 2?
Answer:

Answer: 1
What is (6 * 7) - 2?
Answer: 1
--------------------
Expected: 40
Model: 1
Correct: 26, Total: 144


------
Question: What is (2 - 1) + 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (2 - 1) + 4?
Answer:

Answer: 7
--------------------
Expected: 5
Model: 7
Correct: 26, Total: 145


------
Question: What is (7 * 5) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 5) + 6?
Answer:

Answer: 21
--------------------
Expected: 41
Model: 21
Correct: 26, Total: 146


------
Question: What is (4 * 7) * 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 7) * 7?
Answer:

Answer: 14
--------------------
Expected: 196
Model: 14
Correct: 26, Total: 147


------
Question: What is (1 * 5) * 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 5) * 7?
Answer:

Answer: 35
--------------------
Expected: 35
Model: 35
Correct: 27, Total: 148


------
Question: What is (9 + 9) * 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 9) * 2?
Answer:

Answer: 34
--------------------
Expected: 36
Model: 34
Correct: 27, Total: 149


------
Question: What is (7 + 1) + 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 1) + 5?
Answer:

Answer: 13
--------------------
Expected: 13
Model: 13
Correct: 28, Total: 150


------
Question: What is (3 - 1) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 1) - 9?
Answer:

Answer: 1
--------------------
Expected: -7
Model: 1
Correct: 28, Total: 151


------
Question: What is (5 - 6) - 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 6) - 2?
Answer:

Answer: 0
--------------------
Expected: -3
Model: 0
Correct: 28, Total: 152


------
Question: What is (8 - 1) * 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 1) * 4?
Answer:

Answer: 20
--------------------
Expected: 28
Model: 20
Correct: 28, Total: 153


------
Question: What is (3 - 8) + 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 8) + 6?
Answer:

Answer: 11
--------------------
Expected: 1
Model: 11
Correct: 28, Total: 154


------
Question: What is (9 - 3) + 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 3) + 7?
Answer:

Answer: 16
--------------------
Expected: 13
Model: 16
Correct: 28, Total: 155


------
Question: What is (8 - 9) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 - 9) - 1?
Answer:

Answer: 0
--------------------
Expected: -2
Model: 0
Correct: 28, Total: 156


------
Question: What is (7 - 6) * 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 6) * 7?
Answer:

Answer: 33
--------------------
Expected: 7
Model: 33
Correct: 28, Total: 157


------
Question: What is (5 * 2) * 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 2) * 7?
Answer:

Answer: 42
--------------------
Expected: 70
Model: 42
Correct: 28, Total: 158


------
Question: What is (4 * 1) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 1) - 8?
Answer:

Answer: 1
--------------------
Expected: -4
Model: 1
Correct: 28, Total: 159


------
Question: What is (8 + 8) + 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 + 8) + 8?
Answer:

Answer: 30
--------------------
Expected: 24
Model: 30
Correct: 28, Total: 160


------
Question: What is (7 * 8) - 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 8) - 1?
Answer:

Answer: 13
--------------------
Expected: 55
Model: 13
Correct: 28, Total: 161


------
Question: What is (1 * 6) - 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 6) - 5?
Answer:

Answer: 1
--------------------
Expected: 1
Model: 1
Correct: 29, Total: 162


------
Question: What is (6 - 4) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 4) * 8?
Answer:

Answer: 40
--------------------
Expected: 16
Model: 40
Correct: 29, Total: 163


------
Question: What is (1 * 6) * 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 6) * 3?
Answer:

Answer: 18
--------------------
Expected: 18
Model: 18
Correct: 30, Total: 164


------
Question: What is (3 - 2) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 - 2) - 9?
Answer:

Answer: 1
--------------------
Expected: -8
Model: 1
Correct: 30, Total: 165


------
Question: What is (3 + 6) - 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 + 6) - 1?
Answer:

Answer: 0
--------------------
Expected: 8
Model: 0
Correct: 30, Total: 166


------
Question: What is (9 * 3) * 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 3) * 9?
Answer:

Answer: 81
--------------------
Expected: 243
Model: 81
Correct: 30, Total: 167


------
Question: What is (6 - 7) * 5?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 7) * 5?
Answer:

Answer: 15
--------------------
Expected: -5
Model: 15
Correct: 30, Total: 168


------
Question: What is (6 - 5) + 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (6 - 5) + 9?
Answer:

Answer: 16
--------------------
Expected: 10
Model: 16
Correct: 30, Total: 169


------
Question: What is (6 * 5) + 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (6 * 5) + 7?
Answer:

Answer: 22
--------------------
Expected: 37
Model: 22
Correct: 30, Total: 170


------
Question: What is (3 * 8) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 8) - 1?
Answer:

Answer: 1
--------------------
Expected: 23
Model: 1
Correct: 30, Total: 171


------
Question: What is (9 * 9) - 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 * 9) - 3?
Answer:

Answer: 24
--------------------
Expected: 78
Model: 24
Correct: 30, Total: 172


------
Question: What is (3 * 6) + 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 6) + 2?
Answer:

Answer: 10
--------------------
Expected: 20
Model: 10
Correct: 30, Total: 173


------
Question: What is (2 - 2) - 9?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (2 - 2) - 9?
Answer:

Answer: 1
--------------------
Expected: -9
Model: 1
Correct: 30, Total: 174


------
Question: What is (9 + 9) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 9) * 4?
Answer:

Answer: 54
--------------------
Expected: 72
Model: 54
Correct: 30, Total: 175


------
Question: What is (4 * 4) * 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 * 4) * 4?
Answer:

Answer: 44
--------------------
Expected: 64
Model: 44
Correct: 30, Total: 176


------
Question: What is (9 - 2) * 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 2) * 9?
Answer:

Answer: 45
--------------------
Expected: 63
Model: 45
Correct: 30, Total: 177


------
Question: What is (2 * 2) * 7?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (2 * 2) * 7?
Answer:

Answer: 42
--------------------
Expected: 28
Model: 42
Correct: 30, Total: 178


------
Question: What is (1 * 6) - 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 * 6) - 7?
Answer:

Answer: 3
--------------------
Expected: -1
Model: 3
Correct: 30, Total: 179


------
Question: What is (5 - 4) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 4) * 8?
Answer:

Answer: 24
--------------------
Expected: 8
Model: 24
Correct: 30, Total: 180


------
Question: What is (5 * 2) - 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 2) - 1?
Answer:

Answer: 3
--------------------
Expected: 9
Model: 3
Correct: 30, Total: 181


------
Question: What is (1 + 6) + 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 6) + 8?
Answer:

Answer: 21
--------------------
Expected: 15
Model: 21
Correct: 30, Total: 182


------
Question: What is (9 - 4) + 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 4) + 9?
Answer:

Answer: 20
--------------------
Expected: 14
Model: 20
Correct: 30, Total: 183


------
Question: What is (9 + 5) - 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 + 5) - 6?
Answer:

Answer: 2
--------------------
Expected: 8
Model: 2
Correct: 30, Total: 184


------
Question: What is (5 - 4) * 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (5 - 4) * 3?
Answer:

Answer: 1
--------------------
Expected: 3
Model: 1
Correct: 30, Total: 185


------
Question: What is (7 * 5) + 3?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 5) + 3?
Answer:

Answer: 16
--------------------
Expected: 38
Model: 16
Correct: 30, Total: 186


------
Question: What is (7 * 2) * 5?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 2) * 5?
Answer:

Answer: 45
--------------------
Expected: 70
Model: 45
Correct: 30, Total: 187


------
Question: What is (5 * 1) * 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (5 * 1) * 8?
Answer:

Answer: 40
--------------------
Expected: 40
Model: 40
Correct: 31, Total: 188


------
Question: What is (7 * 2) * 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (7 * 2) * 4?
Answer:

Answer: 44
--------------------
Expected: 56
Model: 44
Correct: 31, Total: 189


------
Question: What is (1 + 7) + 4?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (1 + 7) + 4?
Answer:

Answer: 12
--------------------
Expected: 12
Model: 12
Correct: 32, Total: 190


------
Question: What is (2 + 6) + 4?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (2 + 6) + 4?
Answer:

Answer: 14
--------------------
Expected: 12
Model: 14
Correct: 32, Total: 191


------
Question: What is (4 + 8) + 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 8) + 2?
Answer:

Answer: 14
--------------------
Expected: 14
Model: 14
Correct: 33, Total: 192


------
Question: What is (8 * 9) * 9?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (8 * 9) * 9?
Answer:

Answer: 108
--------------------
Expected: 648
Model: 108
Correct: 33, Total: 193


------
Question: What is (7 + 1) + 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 + 1) + 8?
Answer:

Answer: 21
--------------------
Expected: 16
Model: 21
Correct: 33, Total: 194


------
Question: What is (4 + 2) + 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 + 2) + 2?
Answer:

Answer: 10
--------------------
Expected: 8
Model: 10
Correct: 33, Total: 195


------
Question: What is (7 - 4) * 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (7 - 4) * 2?
Answer:

Answer: 1
--------------------
Expected: 6
Model: 1
Correct: 33, Total: 196


------
Question: What is (4 - 2) + 8?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (4 - 2) + 8?
Answer:

Answer: 13
--------------------
Expected: 10
Model: 13
Correct: 33, Total: 197


------
Question: What is (3 * 5) - 8?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (3 * 5) - 8?
Answer:

Answer: 1
--------------------
Expected: 7
Model: 1
Correct: 33, Total: 198


------
Question: What is (1 - 5) - 3?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is (1 - 5) - 3?
Answer:

Answer: 0
--------------------
Expected: -7
Model: 0
Correct: 33, Total: 199


------
Question: What is (9 - 5) * 1?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is (9 - 5) * 1?
Answer:

Answer: 4
--------------------
Expected: 4
Model: 4
Correct: 34, Total: 200


Results saved to: results\Qwen_Qwen2.5-Math-1.5B\Qwen_Qwen2.5-Math-1.5B_arithmetic_arithmetic_1dc_20250512_150123.csv
Type statistics saved to: results\Qwen_Qwen2.5-Math-1.5B\Qwen_Qwen2.5-Math-1.5B_arithmetic_arithmetic_1dc_20250512_150123_stats.csv
Math Evaluation Results for arithmetic_1dc: {'accuracy': 0.17, 'correct': 34, 'total': 200}
Evaluating math performance on dataset arithmetic_2da
Evaluating math performance...
------
Question: What is 67 plus 88?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 67 plus 88?
Answer:

Answer: 155
--------------------
Expected: 155
Model: 155
Correct: 1, Total: 1


------
Question: What is 38 plus 98?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 38 plus 98?
Answer:

Answer: 196
--------------------
Expected: 136
Model: 196
Correct: 1, Total: 2


------
Question: What is 82 plus 64?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 82 plus 64?
Answer:

Answer: 146
--------------------
Expected: 146
Model: 146
Correct: 2, Total: 3


------
Question: What is 12 plus 63?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 12 plus 63?
Answer:

Answer: 75
--------------------
Expected: 75
Model: 75
Correct: 3, Total: 4


------
Question: What is 60 plus 44?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 60 plus 44?
Answer:

Answer: 104
--------------------
Expected: 104
Model: 104
Correct: 4, Total: 5


------
Question: What is 17 plus 99?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 17 plus 99?
Answer:

Answer: 116
--------------------
Expected: 116
Model: 116
Correct: 5, Total: 6


------
Question: What is 54 plus 57?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 54 plus 57?
Answer:

Answer: 111
--------------------
Expected: 111
Model: 111
Correct: 6, Total: 7


------
Question: What is 3 plus 27?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 3 plus 27?
Answer:

Answer: 30
--------------------
Expected: 30
Model: 30
Correct: 7, Total: 8


------
Question: What is 61 plus 94?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 61 plus 94?
Answer:

Answer: 155
--------------------
Expected: 155
Model: 155
Correct: 8, Total: 9


------
Question: What is 14 plus 30?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 14 plus 30?
Answer:

Answer: 44
--------------------
Expected: 44
Model: 44
Correct: 9, Total: 10


------
Question: What is 45 plus 40?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 45 plus 40?
Answer:

Answer: 85
--------------------
Expected: 85
Model: 85
Correct: 10, Total: 11


------
Question: What is 6 plus 39?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 6 plus 39?
Answer:

Answer: 45
--------------------
Expected: 45
Model: 45
Correct: 11, Total: 12


------
Question: What is 55 plus 7?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 55 plus 7?
Answer:

Answer: 62
--------------------
Expected: 62
Model: 62
Correct: 12, Total: 13


------
Question: What is 21 plus 74?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 21 plus 74?
Answer:

Answer: 95
--------------------
Expected: 95
Model: 95
Correct: 13, Total: 14


------
Question: What is 17 plus 48?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 17 plus 48?
Answer:

Answer: 65
--------------------
Expected: 65
Model: 65
Correct: 14, Total: 15


------
Question: What is 64 plus 29?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 64 plus 29?
Answer:

Answer: 93
--------------------
Expected: 93
Model: 93
Correct: 15, Total: 16


------
Question: What is 65 plus 63?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 65 plus 63?
Answer:

Answer: 138
--------------------
Expected: 128
Model: 138
Correct: 15, Total: 17


------
Question: What is 50 plus 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 50 plus 6?
Answer:

Answer: 56
--------------------
Expected: 56
Model: 56
Correct: 16, Total: 18


------
Question: What is 5 plus 34?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 5 plus 34?
Answer:

Answer: 39
--------------------
Expected: 39
Model: 39
Correct: 17, Total: 19


------
Question: What is 31 plus 94?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 31 plus 94?
Answer:

Answer: 125
--------------------
Expected: 125
Model: 125
Correct: 18, Total: 20


------
Question: What is 98 plus 50?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 98 plus 50?
Answer:

Answer: 148
--------------------
Expected: 148
Model: 148
Correct: 19, Total: 21


------
Question: What is 17 plus 71?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 17 plus 71?
Answer:

Answer: 88
--------------------
Expected: 88
Model: 88
Correct: 20, Total: 22


------
Question: What is 27 plus 94?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 27 plus 94?
Answer:

Answer: 121
--------------------
Expected: 121
Model: 121
Correct: 21, Total: 23


------
Question: What is 66 plus 99?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 66 plus 99?
Answer:

Answer: 165
--------------------
Expected: 165
Model: 165
Correct: 22, Total: 24


------
Question: What is 36 plus 25?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 36 plus 25?
Answer:

Answer: 51
--------------------
Expected: 61
Model: 51
Correct: 22, Total: 25


------
Question: What is 45 plus 10?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 45 plus 10?
Answer:

Answer: 55
--------------------
Expected: 55
Model: 55
Correct: 23, Total: 26


------
Question: What is 54 plus 77?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 54 plus 77?
Answer:

Answer: 131
--------------------
Expected: 131
Model: 131
Correct: 24, Total: 27


------
Question: What is 86 plus 40?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 86 plus 40?
Answer:

Answer: 126
--------------------
Expected: 126
Model: 126
Correct: 25, Total: 28


------
Question: What is 91 plus 31?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 91 plus 31?
Answer:

Answer: 122
--------------------
Expected: 122
Model: 122
Correct: 26, Total: 29


------
Question: What is 71 plus 49?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 71 plus 49?
Answer:

Answer: 120
--------------------
Expected: 120
Model: 120
Correct: 27, Total: 30


------
Question: What is 10 plus 95?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 10 plus 95?
Answer:

Answer: 10 + 95 = 105
--------------------
Expected: 105
Model: 105
Correct: 28, Total: 31


------
Question: What is 31 plus 38?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 31 plus 38?
Answer:

Answer: 59
--------------------
Expected: 69
Model: 59
Correct: 28, Total: 32


------
Question: What is 84 plus 98?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 84 plus 98?
Answer:

Answer: 182
--------------------
Expected: 182
Model: 182
Correct: 29, Total: 33


------
Question: What is 79 plus 2?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 79 plus 2?
Answer:

Answer: 81
--------------------
Expected: 81
Model: 81
Correct: 30, Total: 34


------
Question: What is 70 plus 33?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 70 plus 33?
Answer:

Answer: 103
--------------------
Expected: 103
Model: 103
Correct: 31, Total: 35


------
Question: What is 4 plus 36?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 4 plus 36?
Answer:

Answer: 43
--------------------
Expected: 40
Model: 43
Correct: 31, Total: 36


------
Question: What is 46 plus 14?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 46 plus 14?
Answer:

Answer: 50
--------------------
Expected: 60
Model: 50
Correct: 31, Total: 37


------
Question: What is 10 plus 69?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 10 plus 69?
Answer:

Answer: 79
--------------------
Expected: 79
Model: 79
Correct: 32, Total: 38


------
Question: What is 40 plus 91?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 40 plus 91?
Answer:

Answer: 131
--------------------
Expected: 131
Model: 131
Correct: 33, Total: 39


------
Question: What is 65 plus 59?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 65 plus 59?
Answer:

Answer: 124
--------------------
Expected: 124
Model: 124
Correct: 34, Total: 40


------
Question: What is 85 plus 80?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 85 plus 80?
Answer:

Answer: 165
--------------------
Expected: 165
Model: 165
Correct: 35, Total: 41


------
Question: What is 21 plus 32?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 21 plus 32?
Answer:

Answer: 53
--------------------
Expected: 53
Model: 53
Correct: 36, Total: 42


------
Question: What is 54 plus 23?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 54 plus 23?
Answer:

Answer: 97
--------------------
Expected: 77
Model: 97
Correct: 36, Total: 43


------
Question: What is 47 plus 43?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 47 plus 43?
Answer:

Answer: 70
--------------------
Expected: 90
Model: 70
Correct: 36, Total: 44


------
Question: What is 56 plus 90?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 56 plus 90?
Answer:

Answer: 146
--------------------
Expected: 146
Model: 146
Correct: 37, Total: 45


------
Question: What is 9 plus 17?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 9 plus 17?
Answer:

Answer: 26
--------------------
Expected: 26
Model: 26
Correct: 38, Total: 46


------
Question: What is 53 plus 56?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 53 plus 56?
Answer:

Answer: 109
--------------------
Expected: 109
Model: 109
Correct: 39, Total: 47


------
Question: What is 47 plus 62?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 47 plus 62?
Answer:

Answer: 119
--------------------
Expected: 109
Model: 119
Correct: 39, Total: 48


------
Question: What is 58 plus 23?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 58 plus 23?
Answer:

Answer: 81
--------------------
Expected: 81
Model: 81
Correct: 40, Total: 49


------
Question: What is 70 plus 85?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 70 plus 85?
Answer:

Answer: 155
--------------------
Expected: 155
Model: 155
Correct: 41, Total: 50


------
Question: What is 90 plus 2?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 90 plus 2?
Answer:

Answer: 92
--------------------
Expected: 92
Model: 92
Correct: 42, Total: 51


------
Question: What is 55 plus 60?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 55 plus 60?
Answer:

Answer: 115
--------------------
Expected: 115
Model: 115
Correct: 43, Total: 52


------
Question: What is 34 plus 36?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 34 plus 36?
Answer:

Answer: 50
--------------------
Expected: 70
Model: 50
Correct: 43, Total: 53


------
Question: What is 66 plus 81?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 66 plus 81?
Answer:

Answer: 147
--------------------
Expected: 147
Model: 147
Correct: 44, Total: 54


------
Question: What is 66 plus 85?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 66 plus 85?
Answer:

Answer: 151
--------------------
Expected: 151
Model: 151
Correct: 45, Total: 55


------
Question: What is 97 plus 55?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 97 plus 55?
Answer:

Answer: 152
--------------------
Expected: 152
Model: 152
Correct: 46, Total: 56


------
Question: What is 83 plus 60?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 83 plus 60?
Answer:

Answer: 143
--------------------
Expected: 143
Model: 143
Correct: 47, Total: 57


------
Question: What is 2 plus 6?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 2 plus 6?
Answer:

Answer: 8
--------------------
Expected: 8
Model: 8
Correct: 48, Total: 58


------
Question: What is 95 plus 96?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 95 plus 96?
Answer:

Answer: 191
--------------------
Expected: 191
Model: 191
Correct: 49, Total: 59


------
Question: What is 41 plus 15?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 41 plus 15?
Answer:

Answer: 41 + 15 = 56
--------------------
Expected: 56
Model: 56
Correct: 50, Total: 60


------
Question: What is 53 plus 65?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 53 plus 65?
Answer:

Answer: 98
--------------------
Expected: 118
Model: 98
Correct: 50, Total: 61


------
Question: What is 36 plus 37?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 36 plus 37?
Answer:

Answer: 53
--------------------
Expected: 73
Model: 53
Correct: 50, Total: 62


------
Question: What is 62 plus 44?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 62 plus 44?
Answer:

Answer: 106
--------------------
Expected: 106
Model: 106
Correct: 51, Total: 63


------
Question: What is 35 plus 40?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 35 plus 40?
Answer:

Answer: 75
--------------------
Expected: 75
Model: 75
Correct: 52, Total: 64


------
Question: What is 69 plus 12?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 69 plus 12?
Answer:

Answer: 81
--------------------
Expected: 81
Model: 81
Correct: 53, Total: 65


------
Question: What is 81 plus 39?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 81 plus 39?
Answer:

Answer: 120
--------------------
Expected: 120
Model: 120
Correct: 54, Total: 66


------
Question: What is 96 plus 30?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 96 plus 30?
Answer:

Answer: 126
--------------------
Expected: 126
Model: 126
Correct: 55, Total: 67


------
Question: What is 57 plus 56?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 57 plus 56?
Answer:

Answer: 113
--------------------
Expected: 113
Model: 113
Correct: 56, Total: 68


------
Question: What is 67 plus 75?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 67 plus 75?
Answer:

Answer: 142
--------------------
Expected: 142
Model: 142
Correct: 57, Total: 69


------
Question: What is 44 plus 17?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 44 plus 17?
Answer:

Answer: 44 + 17 = 61
--------------------
Expected: 61
Model: 61
Correct: 58, Total: 70


------
Question: What is 75 plus 37?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 75 plus 37?
Answer:

Answer: 112
--------------------
Expected: 112
Model: 112
Correct: 59, Total: 71


------
Question: What is 23 plus 69?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 23 plus 69?
Answer:

Answer: 92
--------------------
Expected: 92
Model: 92
Correct: 60, Total: 72


------
Question: What is 85 plus 16?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 85 plus 16?
Answer:

Answer: 101
--------------------
Expected: 101
Model: 101
Correct: 61, Total: 73


------
Question: What is 7 plus 1?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 7 plus 1?
Answer:

Answer: 8
--------------------
Expected: 8
Model: 8
Correct: 62, Total: 74


------
Question: What is 33 plus 85?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 33 plus 85?
Answer:

Answer: 128
--------------------
Expected: 118
Model: 128
Correct: 62, Total: 75


------
Question: What is 53 plus 48?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 53 plus 48?
Answer:

Answer: 101
--------------------
Expected: 101
Model: 101
Correct: 63, Total: 76


------
Question: What is 20 plus 0?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 20 plus 0?
Answer:

Answer: 20
--------------------
Expected: 20
Model: 20
Correct: 64, Total: 77


------
Question: What is 0 plus 6?
Answer:
------


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


USING TOOLS, PRE-TOOL TEXT: Question: What is 0 plus 6?
Answer:

Answer: 6
--------------------
Expected: 6
Model: 6
Correct: 65, Total: 78


------
Question: What is 95 plus 53?
Answer:
------
USING TOOLS, PRE-TOOL TEXT: Question: What is 95 plus 53?
Answer:

Answer: 148
--------------------
Expected: 148
Model: 148
Correct: 66, Total: 79


------
Question: What is 93 plus 27?
Answer:
------


# Pure Fine Tuning

In [None]:
# PURE FINE TUNING TRAINING
print_section("Pure Fine Tuning Training")

# Prepare the training data based on dataset type
train_dataset = combine_and_tokenize(train_data, tokenizer, path=PURE_TRAIN_DATASET_PATH)

# Load previous model if it exists
try:
    previous_path = os.path.join(CHECKPOINTS, "finetuned", TOOL_FINETUNED_SAVE_PATH)
    tokenizer, model, metadata = load_model(previous_path)
    
    # Get total epochs from metadata
    total_epochs = metadata.get("total_epochs", 0) + current_epochs_pure
    print(f"Continuing training from {metadata.get('total_epochs', 0)} epochs to {total_epochs} epochs")
except FileNotFoundError:
    # Start fresh training
    total_epochs = current_epochs_pure
    print(f"Starting fresh training for {current_epochs_pure} epochs")

# Train the model
model, tokenizer, metadata = train_model(model, tokenizer, train_dataset, num_epochs=current_epochs_pure)

# Save with updated epoch count
saved_path = save_model(
    model, 
    tokenizer, 
    os.path.join(CHECKPOINTS, "finetuned", TOOL_FINETUNED_SAVE_PATH),
    epochs=current_epochs_pure,
    total_epochs=total_epochs
)
print(f"Saved fine-tuned model to {saved_path} (Total epochs: {total_epochs})")