In [None]:
pip install datasets

In [None]:
pip install transformers[torch]

In [28]:
import datasets as data
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer

glue = data.load_dataset('glue', 'sst2', split='train[:1200]')
glue = glue.train_test_split(test_size=0.2, shuffle=False)

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples['sentence'], truncation=True)

tokenized_dataset = glue.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer)

metric = data.load_metric("glue", "sst2")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


Map:   0%|          | 0/960 [00:00<?, ? examples/s]

Map:   0%|          | 0/240 [00:00<?, ? examples/s]

In [54]:
model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=2)
model.config.pad_token_id = model.config.eos_token_id

training_args = TrainingArguments(
        "test_trainer",
          per_device_train_batch_size = 9,
          per_device_eval_batch_size = 16,
          learning_rate = 6e-4,
          max_steps = 800,
          logging_steps = 50,
          evaluation_strategy = "steps"
          )

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [55]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
50,0.9664,0.693495,0.529167
100,0.7415,0.648107,0.575
150,0.6202,0.709416,0.633333
200,0.4667,0.513669,0.733333
250,0.2514,1.146199,0.783333
300,0.2104,0.826858,0.808333
350,0.0926,1.20136,0.795833
400,0.053,1.257501,0.795833
450,0.0224,1.462587,0.783333
500,0.0079,1.591299,0.791667


TrainOutput(global_step=800, training_loss=0.21592655009124428, metrics={'train_runtime': 112.3522, 'train_samples_per_second': 64.084, 'train_steps_per_second': 7.12, 'total_flos': 107990068396032.0, 'train_loss': 0.21592655009124428, 'epoch': 7.48})