In [193]:
import numpy as np
from scipy.special import softmax

from transformers import Trainer
from transformers import TrainingArguments
from transformers import AutoModelForSequenceClassification

from datasets import load_metric
from datasets import load_dataset
from datasets import load_from_disk

from sklearn.metrics import accuracy_score, recall_score, f1_score

from transformers import AutoTokenizer, DataCollatorWithPadding, BertForSequenceClassification

In [194]:
import torch
torch.cuda.empty_cache()

In [195]:
import os
print(os.getcwd())

/content


In [196]:
# !pip install datasets

In [197]:
raw_datasets = load_from_disk("data")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["text"], padding=True, truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # pad all the examples to the length of the longest element when we batch elements together — dynamic padding.

In [198]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 35283
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 15122
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 5791
    })
})

In [199]:
raw_datasets['train']

Dataset({
    features: ['text', 'label'],
    num_rows: 35283
})

In [200]:
metric = load_metric("glue", "mrpc")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [201]:
# !pip install transformers[torch]
# !pip install accelerate -U

In [202]:
# !pip install accelerate==0.24.0

In [203]:
import accelerate
print(accelerate.__version__)

0.24.0


In [204]:
training_args = TrainingArguments(
    output_dir="test-trainer",
    evaluation_strategy="epoch",
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=256,
    # During the first 500 training steps, the learning rate gradually increases from 0 (or a small base rate) to the specified learning rate.
    # This gradual increase helps in stabilizing the training process and often leads to better performance, as it prevents the model from making too large updates too quickly.
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='logs',
    logging_steps=10,  # how frequently the training progress is logged
    save_strategy="epoch",  # Set save strategy to match evaluation strategy
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    # fp16=True,  # Enable mixed precision training
)
# model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
model = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


OutOfMemoryError: ignored

In [None]:
trainer.train()

In [None]:
predictions = trainer.predict(tokenized_datasets["test"])
print(predictions.predictions.shape, predictions.label_ids.shape)

In [None]:
predictions

In [None]:
# Apply softmax to convert logits to probabilities
probabilities = softmax(predictions.predictions, axis=1)

# Get the predicted class labels
predicted_labels = np.argmax(probabilities, axis=1)

print("Probabilities:\n", probabilities)
print("Predicted Labels:\n", predicted_labels)

In [None]:
true_labels = tokenized_datasets["test"]["label"]

accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print("Accuracy:", accuracy)
print("Recall:", recall)
print("F1 Score:", f1)
