<a href="https://colab.research.google.com/github/dietmarja/LLM-Elements/blob/main/GLUE/GLUE_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Using GLUE to assess the performance of a LLM

# Install necessary libraries if not already installed
!pip install -q transformers datasets

# Imports
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, load_metric

# Load pre-trained model and tokenizer
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the SST-2 dataset from GLUE
dataset = load_dataset("glue", "sst2")

# Preprocess the dataset
def preprocess_function(examples):
    return tokenizer(examples['sentence'], truncation=True, padding=True, max_length=128)

encoded_dataset = dataset.map(preprocess_function, batched=True)

# Define the evaluation metric
metric = load_metric("glue", "sst2")

def compute_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(axis=1)
    return metric.compute(predictions=predictions, references=labels)

# Setup the training arguments and trainer
training_args = TrainingArguments(
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train and evaluate the model
trainer.train()
eval_result = trainer.evaluate()

print(f"Evaluation result: {eval_result}")
