In [1]:
dataset_name = "rotten_tomatoes"
tokenizer_name = "distilbert-base-uncased"
model_name = "distilbert-base-uncased"

In [2]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.weight', 'classifier.bias', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="path/to/save/folder/",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
)

In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

In [5]:
from datasets import load_dataset
dataset = load_dataset(dataset_name)  # doctest: +IGNORE_RESULT

In [6]:
def tokenize_dataset(dataset):
    return tokenizer(dataset["text"])

In [7]:
dataset = dataset.map(tokenize_dataset, batched=True)

In [8]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [9]:
from transformers import Trainer
from transformers import TrainerCallback
import time

class MyCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, **kwargs):
        self.start_time = time.time()
        self.forward_pass_time = 0

        self.batch_start_time = 0
        self.batch_end_time = 0
        self.batch_time = 0

    def on_train_batch_begin(self, batch, logs=None):
        self.batch_start_time = time.time()

    def on_train_batch_end(self, batch, logs=None):
        self.batch_end_time = time.time()
        self.batch_time = self.batch_end_time - self.batch_start_time
        self.forward_pass_time += self.batch_time

    def on_epoch_end(self, epoch, logs=None):
        end_time = time.time()
        epoch_time = end_time - self.start_time
        fwd_pass_percent = round((self.forward_pass_time/epoch_time)*100,2)
        wgt_pass_time = epoch_time - self.forward_pass_time
        wgt_and_bkp_percent = round((wgt_pass_time/epoch_time)*100,2)
        print(f"\n  - Total:\t {round(epoch_time,10)}s")
        print(f"  - Forward:\t {round(self.forward_pass_time,10)}s ({fwd_pass_percent}%)")
        print(f"  - Wgt&BkP:\t {round(wgt_pass_time,10)}s ({wgt_and_bkp_percent}%)")
        
        with tf.profiler.experimental.Profile("logs/profile/"):
            with tf.profiler.experimental.Trace("Backpropagation", step_num=epoch):
                model.evaluate(x_test, y_test, verbose=2)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[MyCallback()]
)  # doctest: +SKIP

In [None]:
trainer.train()
#For tasks - like translation or summarization - that use a sequence-to-sequence model, use the Seq2SeqTrainer 
#and Seq2SeqTrainingArguments classes instead.

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
