<a href="https://colab.research.google.com/github/alanssitis/ROS-Intro-Project/blob/main/MobileBERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers[torch] datasets evaluate accelerate

Collecting transformers[torch]
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers[torch])
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from typing import List, Optional, Tuple, Union

import evaluate
import numpy as np
import torch

from torch import nn
from datasets import load_dataset
from transformers import (
    MobileBertModel,
    MobileBertForSequenceClassification,
    MobileBertTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## Test The Corpus of Linguistic Acceptability

In [3]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/847 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/147M [00:00<?, ?B/s]

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
raw_datasets = load_dataset("glue", "cola",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence"], return_tensors="pt", max_length=128, padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

Downloading builder script:   0%|          | 0.00/28.8k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/27.9k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/377k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

In [5]:
metric = evaluate.load("matthews_correlation")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/6.60k [00:00<?, ?B/s]

In [6]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Matthews Correlation
1,0.4974,0.4854,0.508885
2,0.3547,0.514453,0.544301
3,0.2382,0.799014,0.536891


TrainOutput(global_step=3207, training_loss=6206.4469615387925, metrics={'train_runtime': 567.9131, 'train_samples_per_second': 45.171, 'train_steps_per_second': 5.647, 'total_flos': 402165422046720.0, 'train_loss': 6206.4469615387925, 'epoch': 3.0})

## Test The Stanford Sentiment Treebank

In [7]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
raw_datasets = load_dataset("glue", "sst2",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence"], return_tensors="pt", max_length=128, padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)


Downloading data:   0%|          | 0.00/7.44M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [9]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Matthews Correlation
1,0.2358,0.393859,0.810913
2,0.1795,0.410118,0.821139
3,0.1106,0.439626,0.81716


TrainOutput(global_step=25257, training_loss=412.0922419551848, metrics={'train_runtime': 4371.8908, 'train_samples_per_second': 46.215, 'train_steps_per_second': 5.777, 'total_flos': 3167517133601280.0, 'train_loss': 412.0922419551848, 'epoch': 3.0})

## Test Microsoft Research Paraphrase Corpus

In [10]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
raw_datasets = load_dataset("glue", "mrpc",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence1"], raw_datasets["sentence2"], return_tensors="pt", max_length=128, padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data: 0.00B [00:00, ?B/s]

Downloading data: 0.00B [00:00, ?B/s]

Downloading data: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

In [12]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [13]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.349117,0.85049
2,28246.074000,0.460115,0.818627
3,0.344900,0.47743,0.877451


TrainOutput(global_step=1377, training_loss=10256.57392471635, metrics={'train_runtime': 245.0398, 'train_samples_per_second': 44.907, 'train_steps_per_second': 5.619, 'total_flos': 172511141160960.0, 'train_loss': 10256.57392471635, 'epoch': 3.0})

## Test Semantic Textual Similarity Benchmark

In [18]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=1)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
raw_datasets = load_dataset("glue", "stsb",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence1"], raw_datasets["sentence2"], return_tensors="pt", max_length=128, padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

In [16]:
metric = evaluate.load("pearsonr")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

In [20]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Pearsonr
1,866697154134.016,0.685251,
2,0.5776,0.576279,
3,0.3033,0.488535,




TrainOutput(global_step=2157, training_loss=200903373698.5261, metrics={'train_runtime': 397.0762, 'train_samples_per_second': 43.435, 'train_steps_per_second': 5.432, 'total_flos': 270376670207232.0, 'train_loss': 200903373698.5261, 'epoch': 3.0})

## Test Quora Question Pairs

In [119]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [120]:
raw_datasets = load_dataset("glue", "qqp",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["question1"], raw_datasets["question2"], return_tensors="pt", padding=True, truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

Map:   0%|          | 0/40430 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [121]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [122]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].select(range(5000)),
    eval_dataset=tokenized_datasets["validation"].select(range(1000)),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,13413.028,0.400634,0.807
2,0.4028,0.452888,0.809
3,0.701,0.612111,0.815


TrainOutput(global_step=1875, training_loss=3577.150262141927, metrics={'train_runtime': 379.2943, 'train_samples_per_second': 39.547, 'train_steps_per_second': 4.943, 'total_flos': 273549265737120.0, 'train_loss': 3577.150262141927, 'epoch': 3.0})

## Test MultiNLI

In [106]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=3)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [113]:
train_datasets = load_dataset("glue", "mnli", split="train").select(range(9000))
eval_matched_datasets, eval_mismatched_datasets = load_dataset("glue", "mnli", split=["validation_matched", "validation_mismatched"])

def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["premise"], raw_datasets["hypothesis"], return_tensors="pt", padding=True, truncation=True)

tokenized_train = train_datasets.map(tokenize_raw_datasets, batched=True)
tokenized_eval_matched = eval_matched_datasets.map(tokenize_raw_datasets, batched=True)
tokenized_eval_mismatched = eval_mismatched_datasets.map(tokenize_raw_datasets, batched=True)


Map:   0%|          | 0/9815 [00:00<?, ? examples/s]

Map:   0%|          | 0/9832 [00:00<?, ? examples/s]

In [114]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [118]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval_matched,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval_mismatched,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.7332,0.717606,0.705553
2,0.5311,0.64539,0.745899
3,0.3392,1.069469,0.749669


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2982,27.219135,0.743389
2,0.2025,4.966821,0.74288
3,0.1177,2.726878,0.747254


TrainOutput(global_step=3375, training_loss=0.3302736386899595, metrics={'train_runtime': 840.9044, 'train_samples_per_second': 32.108, 'train_steps_per_second': 4.014, 'total_flos': 772760846525184.0, 'train_loss': 0.3302736386899595, 'epoch': 3.0})

## Test Question NLI

In [88]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [92]:
train_datasets = load_dataset("glue", "qnli", split="train").select(range(5000))
eval_datasets = load_dataset("glue", "qnli", split="validation").select(range(5000))

def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["question"], raw_datasets["sentence"], return_tensors="pt", max_length=128, padding="max_length", truncation=True)

tokenized_train_datasets = train_datasets.map(tokenize_raw_datasets, batched=True)
tokenized_eval_datasets = eval_datasets.map(tokenize_raw_datasets, batched=True)

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [94]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [95]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_datasets,
    eval_dataset=tokenized_eval_datasets,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,39070.668,0.388727,0.8392
2,3.3516,0.415466,0.835
3,0.3173,0.620195,0.8446


TrainOutput(global_step=1875, training_loss=10419.874475976563, metrics={'train_runtime': 452.3475, 'train_samples_per_second': 33.16, 'train_steps_per_second': 4.145, 'total_flos': 235156953600000.0, 'train_loss': 10419.874475976563, 'epoch': 3.0})

## Test Recognizing Textual Entailment

In [96]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [98]:
raw_datasets = load_dataset("glue", "rte",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence1"], raw_datasets["sentence2"], return_tensors="pt", padding=True, truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [99]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [100]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.657082,0.602888
2,29089.734000,0.601782,0.649819
3,29089.734000,0.662739,0.67148


TrainOutput(global_step=936, training_loss=15539.621208451752, metrics={'train_runtime': 188.4762, 'train_samples_per_second': 39.634, 'train_steps_per_second': 4.966, 'total_flos': 263835078957000.0, 'train_loss': 15539.621208451752, 'epoch': 3.0})

## Test Winograd NLI

In [101]:
tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
model = MobileBertForSequenceClassification.from_pretrained("google/mobilebert-uncased", num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [102]:
raw_datasets = load_dataset("glue", "wnli",)
def tokenize_raw_datasets(raw_datasets):
    return tokenizer(raw_datasets["sentence1"], raw_datasets["sentence2"], return_tensors="pt", padding=True, truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_raw_datasets, batched=True)

Downloading data:   0%|          | 0.00/29.0k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/635 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/71 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/146 [00:00<?, ? examples/s]

Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/146 [00:00<?, ? examples/s]

In [103]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [104]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.894971,0.43662
2,No log,0.759654,0.56338
3,No log,0.735726,0.323944


TrainOutput(global_step=240, training_loss=42939.666666666664, metrics={'train_runtime': 46.3611, 'train_samples_per_second': 41.091, 'train_steps_per_second': 5.177, 'total_flos': 25198537309200.0, 'train_loss': 42939.666666666664, 'epoch': 3.0})