<a href="https://colab.research.google.com/github/jacomijnprins/LoLa/blob/Jessica/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
from datasets import load_dataset
import random
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


In [46]:
snli = load_dataset("snli")

In [47]:
# PREP DATA FOR HYPERPARAMETER TRAINING
# Filter to remove invalid rows (-1 labels)
for split in snli:
    snli[split] = snli[split].filter(lambda x: x["label"] >= 0)

# Specify the split and sample size
split = "train"
sample_size = 1000

# Ensure reproducibility
random.seed(42)

# Randomly sample three subsets
subset1 = snli[split].shuffle(seed=42).select(range(sample_size))
subset2 = snli[split].shuffle(seed=43).select(range(sample_size))
subset3 = snli[split].shuffle(seed=44).select(range(sample_size))
subset4 = snli[split].shuffle(seed=45).select(range(sample_size))
subset5 = snli[split].shuffle(seed=46).select(range(sample_size))



# Print a summary of the subsets
print("Subset 1:", subset1)
print("Subset 2:", subset2)
print("Subset 3:", subset3)
print("Subset 4:", subset4)
print("Subset 5:", subset5)

Subset 1: Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 1000
})
Subset 2: Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 1000
})
Subset 3: Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 1000
})
Subset 4: Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 1000
})
Subset 5: Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 1000
})


In [48]:
subsets = [subset1, subset2, subset3, subset4, subset5]

In [49]:
def train_val_split(dataset, train_ratio=0.8):
  train_size = int(train_ratio*len(dataset))

  train_dataset = dataset.select(range(train_size))
  val_dataset = dataset.select(range(train_size, len(dataset)))

  return train_dataset, val_dataset

#split subsets into training and validation and encode data
finetune_data = []
for subset in subsets:
  train_dataset, val_dataset = train_val_split(subset)
  train_dataset = train_dataset.map(tokenize_function, batched=True)
  val_dataset = val_dataset.map(tokenize_function, batched=True)
  finetune_data.append((train_dataset, val_dataset))


In [51]:
## PREP MODEL

#Load the tokenizer and model
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["premise"], examples["hypothesis"], truncation=True)

# Tokenize the dataset
#encoded_snli = snli.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy, "f1": f1, "precision": precision, "recall": recall}

In [54]:
#Finetuning Distilbert learning_rate=2e-5, decay=00.1
train1, val1 = finetune_data[0]


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",)

trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=train1,
      eval_dataset=val1,
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.091713,0.405,0.275375,0.355612,0.405


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TrainOutput(global_step=50, training_loss=1.0999875640869141, metrics={'train_runtime': 276.1812, 'train_samples_per_second': 2.897, 'train_steps_per_second': 0.181, 'total_flos': 8672629667040.0, 'train_loss': 1.0999875640869141, 'epoch': 1.0})

In [56]:
#Finetuning Distilbert learning_rate=2e-5, decay=00.1
train2, val2 = finetune_data[1]


training_args2 = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",)

trainer2 = Trainer(
      model=model,
      args=training_args2,
      train_dataset=train2,
      eval_dataset=val2,
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

trainer2.train()

  trainer2 = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.047704,0.485,0.421677,0.467894,0.485


TrainOutput(global_step=50, training_loss=1.0414228057861328, metrics={'train_runtime': 271.3423, 'train_samples_per_second': 2.948, 'train_steps_per_second': 0.184, 'total_flos': 8747143907616.0, 'train_loss': 1.0414228057861328, 'epoch': 1.0})

In [58]:
#Finetuning Distilbert learning_rate=2e-5, decay=00.1
train3, val3 = finetune_data[2]


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.001,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",)

trainer3 = Trainer(
      model=model,
      args=training_args,
      train_dataset=train3,
      eval_dataset=val3,
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

trainer3.train()

  trainer3 = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.005953,0.52,0.455162,0.593087,0.52


TrainOutput(global_step=50, training_loss=1.031500930786133, metrics={'train_runtime': 258.883, 'train_samples_per_second': 3.09, 'train_steps_per_second': 0.193, 'total_flos': 8668489987008.0, 'train_loss': 1.031500930786133, 'epoch': 1.0})

In [60]:
train4, val4 = finetune_data[3]


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.00001,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",)

trainer4 = Trainer(
      model=model,
      args=training_args,
      train_dataset=train4,
      eval_dataset=val4,
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

trainer4.train()

  trainer4 = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.967971,0.535,0.49493,0.547454,0.535


TrainOutput(global_step=50, training_loss=0.9777275848388672, metrics={'train_runtime': 281.5979, 'train_samples_per_second': 2.841, 'train_steps_per_second': 0.178, 'total_flos': 8788540707936.0, 'train_loss': 0.9777275848388672, 'epoch': 1.0})

In [61]:
train5, val5 = finetune_data[4]


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-6,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.00001,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",)

trainer5 = Trainer(
      model=model,
      args=training_args,
      train_dataset=train5,
      eval_dataset=val5,
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

trainer5.train()

  trainer5 = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.922357,0.575,0.549677,0.573453,0.575


TrainOutput(global_step=50, training_loss=0.9356491088867187, metrics={'train_runtime': 262.9493, 'train_samples_per_second': 3.042, 'train_steps_per_second': 0.19, 'total_flos': 8883753348672.0, 'train_loss': 0.9356491088867187, 'epoch': 1.0})