In [1]:
import pandas as pd
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch

# Load the training dataset
train_dataset = load_dataset('json', data_files='dataset/AQuA-master/train.json', split='train')

# Load the test dataset
test_dataset = load_dataset('json', data_files='dataset/AQuA-master/test.json', split='train')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Convert correct answers to indices
def correct_to_idx(correct_option):
    return ord(correct_option) - ord('A')

train_dataset = train_dataset.map(lambda x: {'correct': correct_to_idx(x['correct'])})
test_dataset = test_dataset.map(lambda x: {'correct': correct_to_idx(x['correct'])})


In [3]:
# Define tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Tokenize the dataset
def preprocess_function(examples):
    inputs = [q + " [SEP] " + r for q, r in zip(examples["question"], examples["rationale"])]
    tokenized_inputs = tokenizer(inputs, padding=True, truncation=True)
    return tokenized_inputs

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:

encoded_train_dataset = train_dataset.map(preprocess_function, batched=True)
encoded_test_dataset = test_dataset.map(preprocess_function, batched=True)

In [37]:
# Rename the label column to 'labels' as expected by the Trainer
encoded_train_dataset = encoded_train_dataset.rename_column("correct", "labels")
encoded_test_dataset = encoded_test_dataset.rename_column("correct", "labels")

In [38]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_test_dataset,
    tokenizer=tokenizer
)

# Train the model
trainer.train()

# Save the model
trainer.save_model("trained_math_model")

# Evaluate the model on the test dataset
results = trainer.evaluate(encoded_test_dataset)

print(f"Test Results: {results}")


  0%|          | 74/146202 [30:05<990:16:43, 24.40s/it]
  0%|          | 438/146202 [23:28<179:14:30,  4.43s/it]

KeyboardInterrupt: 