# BERT for classification

## Import the dataset

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K

## Bert + softmax

In [None]:
import torch
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from datasets import load_dataset
from tqdm import tqdm  # For progress bars

class SentimentClassifier:
    def __init__(self, model_name='bert-base-uncased', num_labels=5, lr=2e-5, batch_size=16):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
        self.optimizer = AdamW(self.model.parameters(), lr=lr, weight_decay=0.1)
        self.batch_size = batch_size

        # Check if GPU is available
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)  # Move model to GPU if available

    def load_data(self):
        # Load the SetFit/sst5 dataset
        dataset = load_dataset("SetFit/sst5")
        train_dataset = dataset['train']
        dev_dataset = dataset['test']
        return train_dataset, dev_dataset

    def preprocess_data(self, sentences, labels):
        inputs = self.tokenizer(sentences, return_tensors='pt', max_length=512, truncation=True, padding=True)
        labels = torch.tensor(labels)
        return inputs, labels

    def train(self, epochs=3):
        # Load and preprocess the dataset
        train_dataset, dev_dataset = self.load_data()
        train_inputs, train_labels = self.preprocess_data(train_dataset['text'], train_dataset['label'])

        # Move data to GPU if available
        train_inputs = {key: val.to(self.device) for key, val in train_inputs.items()}
        train_labels = train_labels.to(self.device)

        total_training_time = 0

        for epoch in range(epochs):
            start_time = time.time()
            self.model.train()
            total_loss = 0

            # Progress bar for batches
            progress_bar = tqdm(range(0, len(train_labels), self.batch_size), desc=f"Epoch {epoch + 1}/{epochs}")

            for i in progress_bar:
                b_input_ids = train_inputs['input_ids'][i:i+self.batch_size]
                b_attention_mask = train_inputs['attention_mask'][i:i+self.batch_size]
                b_labels = train_labels[i:i+self.batch_size]

                self.model.zero_grad()
                outputs = self.model(b_input_ids, attention_mask=b_attention_mask, labels=b_labels)
                loss = outputs.loss
                total_loss += loss.item()
                loss.backward()
                self.optimizer.step()

                # Update progress bar with the current loss
                progress_bar.set_postfix(loss=loss.item())

            avg_train_loss = total_loss / (len(train_labels) // self.batch_size)
            epoch_time = time.time() - start_time
            total_training_time += epoch_time
            print(f"Epoch {epoch + 1} completed in {epoch_time:.2f} seconds. Loss: {avg_train_loss:.4f}")

            # Estimate remaining time
            remaining_time = (epochs - (epoch + 1)) * (total_training_time / (epoch + 1))
            print(f"Estimated remaining time: {remaining_time:.2f} seconds")

            # Evaluation on dev set
            avg_dev_accuracy = self.evaluate(dev_dataset['text'], dev_dataset['label'])
            print(f"Epoch {epoch + 1}, Dev Accuracy: {avg_dev_accuracy:.4f}")

        print(f"Total training time: {total_training_time:.2f} seconds")

    def evaluate(self, sentences, labels):
        self.model.eval()
        inputs, labels = self.preprocess_data(sentences, labels)

        # Move data to GPU if available
        inputs = {key: val.to(self.device) for key, val in inputs.items()}
        labels = labels.to(self.device)

        with torch.no_grad():
            outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
        logits = outputs.logits

        preds = torch.argmax(logits, dim=1)
        accuracy = (preds == labels).float().mean().item()

        return accuracy

In [None]:
def train_and_test(params):
  dataset = load_dataset("SetFit/sst5")
  test_dataset = dataset['test']
  accuracies = dict()
  for param in params:
    classifier = SentimentClassifier(**param)
    classifier.train(epochs=3)
    accuracies[str(param)] = classifier.evaluate(test_dataset['text'], test_dataset['label'])
  return accuracies

In [None]:
params = [
    {
        'lr': 1e-4,
        'batch_size': 16,
    },
    {
        'lr': 1e-5,
        'batch_size': 16,
    },
    {
        'lr': 1e-6,
        'batch_size': 16,
    },
    {
        'lr': 1e-4,
        'batch_size': 32,
    },
    {
        'lr': 1e-5,
        'batch_size': 32,
    },
    {
        'lr': 1e-6,
        'batch_size': 32,
    },
]

In [None]:
accs = train_and_test(params)
print(accs)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/421 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


Downloading data:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/171k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/343k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8544 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1101 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2210 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:00<00:00,  4.42it/s, loss=1.42]


Epoch 1 completed in 120.95 seconds. Loss: 1.2785
Estimated remaining time: 241.91 seconds
Epoch 1, Dev Accuracy: 0.5081


Epoch 2/3: 100%|██████████| 534/534 [02:02<00:00,  4.37it/s, loss=1.11]


Epoch 2 completed in 122.29 seconds. Loss: 0.9932
Estimated remaining time: 121.62 seconds
Epoch 2, Dev Accuracy: 0.4710


Epoch 3/3: 100%|██████████| 534/534 [02:01<00:00,  4.41it/s, loss=0.847]


Epoch 3 completed in 121.14 seconds. Loss: 0.8178
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4158
Total training time: 364.38 seconds


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:01<00:00,  4.41it/s, loss=1.16]


Epoch 1 completed in 121.18 seconds. Loss: 1.2681
Estimated remaining time: 242.36 seconds
Epoch 1, Dev Accuracy: 0.5140


Epoch 2/3: 100%|██████████| 534/534 [02:00<00:00,  4.42it/s, loss=1.1]


Epoch 2 completed in 120.75 seconds. Loss: 0.9877
Estimated remaining time: 120.96 seconds
Epoch 2, Dev Accuracy: 0.5249


Epoch 3/3: 100%|██████████| 534/534 [02:00<00:00,  4.41it/s, loss=1.06]


Epoch 3 completed in 120.97 seconds. Loss: 0.8113
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5253
Total training time: 362.90 seconds


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:00<00:00,  4.42it/s, loss=1.53]


Epoch 1 completed in 120.91 seconds. Loss: 1.5665
Estimated remaining time: 241.82 seconds
Epoch 1, Dev Accuracy: 0.3493


Epoch 2/3: 100%|██████████| 534/534 [02:00<00:00,  4.42it/s, loss=1.29]


Epoch 2 completed in 120.88 seconds. Loss: 1.4585
Estimated remaining time: 120.89 seconds
Epoch 2, Dev Accuracy: 0.4335


Epoch 3/3: 100%|██████████| 534/534 [02:00<00:00,  4.42it/s, loss=1.16]


Epoch 3 completed in 120.77 seconds. Loss: 1.3130
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4593
Total training time: 362.56 seconds


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=1.14]


Epoch 1 completed in 108.29 seconds. Loss: 1.2475
Estimated remaining time: 216.58 seconds
Epoch 1, Dev Accuracy: 0.5140


Epoch 2/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=0.801]


Epoch 2 completed in 108.17 seconds. Loss: 0.9596
Estimated remaining time: 108.23 seconds
Epoch 2, Dev Accuracy: 0.5222


Epoch 3/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=0.784]


Epoch 3 completed in 108.16 seconds. Loss: 0.7907
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5195
Total training time: 324.62 seconds


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:48<00:00,  2.46it/s, loss=1.38]


Epoch 1 completed in 108.33 seconds. Loss: 1.3180
Estimated remaining time: 216.67 seconds
Epoch 1, Dev Accuracy: 0.5077


Epoch 2/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=1.03]


Epoch 2 completed in 108.18 seconds. Loss: 1.0768
Estimated remaining time: 108.26 seconds
Epoch 2, Dev Accuracy: 0.5443


Epoch 3/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=0.94]


Epoch 3 completed in 108.11 seconds. Loss: 0.9274
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5267
Total training time: 324.62 seconds


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=1.53]


Epoch 1 completed in 108.25 seconds. Loss: 1.5889
Estimated remaining time: 216.51 seconds
Epoch 1, Dev Accuracy: 0.3018


Epoch 2/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=1.44]


Epoch 2 completed in 108.14 seconds. Loss: 1.4854
Estimated remaining time: 108.20 seconds
Epoch 2, Dev Accuracy: 0.4371


Epoch 3/3: 100%|██████████| 267/267 [01:48<00:00,  2.47it/s, loss=1.36]


Epoch 3 completed in 108.09 seconds. Loss: 1.3589
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4697
Total training time: 324.49 seconds
{"{'lr': 0.0001, 'batch_size': 16}": 0.4158371090888977, "{'lr': 1e-05, 'batch_size': 16}": 0.5253393650054932, "{'lr': 1e-06, 'batch_size': 16}": 0.459276020526886, "{'lr': 0.0001, 'batch_size': 32}": 0.5194570422172546, "{'lr': 1e-05, 'batch_size': 32}": 0.5266968607902527, "{'lr': 1e-06, 'batch_size': 32}": 0.4696832597255707}


# RoBERTa for classification

In [None]:
params = list(map(lambda x: {**x, 'model_name': 'roberta-base'}, params))

accs_roberta = train_and_test(params)
print(accs_roberta)

Repo card metadata block was not found. Setting CardData to empty.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, loss=1.56]


Epoch 1 completed in 121.96 seconds. Loss: 1.5807
Estimated remaining time: 243.93 seconds
Epoch 1, Dev Accuracy: 0.2864


Epoch 2/3: 100%|██████████| 534/534 [02:01<00:00,  4.39it/s, loss=1.57]


Epoch 2 completed in 121.63 seconds. Loss: 1.5763
Estimated remaining time: 121.80 seconds
Epoch 2, Dev Accuracy: 0.2308


Epoch 3/3: 100%|██████████| 534/534 [02:01<00:00,  4.40it/s, loss=1.55]


Epoch 3 completed in 121.40 seconds. Loss: 1.5749
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.2308
Total training time: 364.99 seconds


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, loss=1.12]


Epoch 1 completed in 121.99 seconds. Loss: 1.2181
Estimated remaining time: 243.98 seconds
Epoch 1, Dev Accuracy: 0.5688


Epoch 2/3: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, loss=1.11]


Epoch 2 completed in 121.86 seconds. Loss: 0.9774
Estimated remaining time: 121.92 seconds
Epoch 2, Dev Accuracy: 0.5710


Epoch 3/3: 100%|██████████| 534/534 [02:02<00:00,  4.38it/s, loss=0.993]


Epoch 3 completed in 122.04 seconds. Loss: 0.8555
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5674
Total training time: 365.88 seconds


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, loss=1.56]


Epoch 1 completed in 121.85 seconds. Loss: 1.5785
Estimated remaining time: 243.71 seconds
Epoch 1, Dev Accuracy: 0.3507


Epoch 2/3: 100%|██████████| 534/534 [02:01<00:00,  4.39it/s, loss=1.08]


Epoch 2 completed in 121.71 seconds. Loss: 1.2837
Estimated remaining time: 121.78 seconds
Epoch 2, Dev Accuracy: 0.5158


Epoch 3/3: 100%|██████████| 534/534 [02:01<00:00,  4.38it/s, loss=1.04]


Epoch 3 completed in 121.86 seconds. Loss: 1.0996
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5457
Total training time: 365.42 seconds


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.38]


Epoch 1 completed in 107.71 seconds. Loss: 1.3137
Estimated remaining time: 215.43 seconds
Epoch 1, Dev Accuracy: 0.5158


Epoch 2/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.26]


Epoch 2 completed in 107.69 seconds. Loss: 1.0990
Estimated remaining time: 107.70 seconds
Epoch 2, Dev Accuracy: 0.4774


Epoch 3/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=0.869]


Epoch 3 completed in 107.65 seconds. Loss: 0.9819
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5176
Total training time: 323.05 seconds


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.18]


Epoch 1 completed in 107.73 seconds. Loss: 1.2512
Estimated remaining time: 215.46 seconds
Epoch 1, Dev Accuracy: 0.5516


Epoch 2/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=0.942]


Epoch 2 completed in 107.77 seconds. Loss: 0.9960
Estimated remaining time: 107.75 seconds
Epoch 2, Dev Accuracy: 0.5701


Epoch 3/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=0.969]


Epoch 3 completed in 107.63 seconds. Loss: 0.8876
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5733
Total training time: 323.14 seconds


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.54]


Epoch 1 completed in 107.73 seconds. Loss: 1.5915
Estimated remaining time: 215.46 seconds
Epoch 1, Dev Accuracy: 0.3575


Epoch 2/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.19]


Epoch 2 completed in 107.82 seconds. Loss: 1.4257
Estimated remaining time: 107.77 seconds
Epoch 2, Dev Accuracy: 0.5014


Epoch 3/3: 100%|██████████| 267/267 [01:47<00:00,  2.48it/s, loss=1.07]


Epoch 3 completed in 107.82 seconds. Loss: 1.1485
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5299
Total training time: 323.37 seconds
{"{'lr': 0.0001, 'batch_size': 16, 'model_name': 'roberta-base'}": 0.23076923191547394, "{'lr': 1e-05, 'batch_size': 16, 'model_name': 'roberta-base'}": 0.5674208402633667, "{'lr': 1e-06, 'batch_size': 16, 'model_name': 'roberta-base'}": 0.5457013845443726, "{'lr': 0.0001, 'batch_size': 32, 'model_name': 'roberta-base'}": 0.5176470875740051, "{'lr': 1e-05, 'batch_size': 32, 'model_name': 'roberta-base'}": 0.5733031630516052, "{'lr': 1e-06, 'batch_size': 32, 'model_name': 'roberta-base'}": 0.5298642516136169}


# DistilBert for classification

In [None]:
params = list(map(lambda x: {**x, 'model_name': 'distilbert-base-uncased'}, params))

accs_distilbert = train_and_test(params)
print(accs_distilbert)

Repo card metadata block was not found. Setting CardData to empty.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [01:02<00:00,  8.58it/s, loss=1.45]


Epoch 1 completed in 62.24 seconds. Loss: 1.2727
Estimated remaining time: 124.48 seconds
Epoch 1, Dev Accuracy: 0.4964


Epoch 2/3: 100%|██████████| 534/534 [01:01<00:00,  8.62it/s, loss=0.947]


Epoch 2 completed in 61.98 seconds. Loss: 0.9747
Estimated remaining time: 62.11 seconds
Epoch 2, Dev Accuracy: 0.4606


Epoch 3/3: 100%|██████████| 534/534 [01:01<00:00,  8.63it/s, loss=1.02]


Epoch 3 completed in 61.88 seconds. Loss: 0.7711
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4430
Total training time: 186.10 seconds


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [01:02<00:00,  8.58it/s, loss=1.21]


Epoch 1 completed in 62.21 seconds. Loss: 1.3023
Estimated remaining time: 124.43 seconds
Epoch 1, Dev Accuracy: 0.5077


Epoch 2/3: 100%|██████████| 534/534 [01:02<00:00,  8.59it/s, loss=0.91]


Epoch 2 completed in 62.15 seconds. Loss: 1.0509
Estimated remaining time: 62.18 seconds
Epoch 2, Dev Accuracy: 0.5172


Epoch 3/3: 100%|██████████| 534/534 [01:02<00:00,  8.61it/s, loss=0.841]


Epoch 3 completed in 62.05 seconds. Loss: 0.8993
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5204
Total training time: 186.41 seconds


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 534/534 [01:02<00:00,  8.60it/s, loss=1.45]


Epoch 1 completed in 62.10 seconds. Loss: 1.5641
Estimated remaining time: 124.21 seconds
Epoch 1, Dev Accuracy: 0.4163


Epoch 2/3: 100%|██████████| 534/534 [01:01<00:00,  8.62it/s, loss=1.28]


Epoch 2 completed in 61.98 seconds. Loss: 1.3803
Estimated remaining time: 62.04 seconds
Epoch 2, Dev Accuracy: 0.4561


Epoch 3/3: 100%|██████████| 534/534 [01:02<00:00,  8.61it/s, loss=1.18]


Epoch 3 completed in 62.05 seconds. Loss: 1.2532
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4792
Total training time: 186.13 seconds


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [00:55<00:00,  4.85it/s, loss=1.21]


Epoch 1 completed in 55.08 seconds. Loss: 1.2536
Estimated remaining time: 110.16 seconds
Epoch 1, Dev Accuracy: 0.5326


Epoch 2/3: 100%|██████████| 267/267 [00:54<00:00,  4.88it/s, loss=1.1]


Epoch 2 completed in 54.70 seconds. Loss: 0.9604
Estimated remaining time: 54.89 seconds
Epoch 2, Dev Accuracy: 0.4973


Epoch 3/3: 100%|██████████| 267/267 [00:54<00:00,  4.86it/s, loss=0.87]


Epoch 3 completed in 54.89 seconds. Loss: 0.7574
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4837
Total training time: 164.67 seconds


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [00:54<00:00,  4.86it/s, loss=1.23]


Epoch 1 completed in 54.99 seconds. Loss: 1.3364
Estimated remaining time: 109.98 seconds
Epoch 1, Dev Accuracy: 0.5036


Epoch 2/3: 100%|██████████| 267/267 [00:54<00:00,  4.87it/s, loss=1.02]


Epoch 2 completed in 54.82 seconds. Loss: 1.0887
Estimated remaining time: 54.91 seconds
Epoch 2, Dev Accuracy: 0.5217


Epoch 3/3: 100%|██████████| 267/267 [00:54<00:00,  4.87it/s, loss=0.951]


Epoch 3 completed in 54.78 seconds. Loss: 0.9642
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.5140
Total training time: 164.59 seconds


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.
Epoch 1/3: 100%|██████████| 267/267 [00:54<00:00,  4.86it/s, loss=1.56]


Epoch 1 completed in 54.92 seconds. Loss: 1.5756
Estimated remaining time: 109.83 seconds
Epoch 1, Dev Accuracy: 0.3593


Epoch 2/3: 100%|██████████| 267/267 [00:54<00:00,  4.88it/s, loss=1.47]


Epoch 2 completed in 54.69 seconds. Loss: 1.5153
Estimated remaining time: 54.81 seconds
Epoch 2, Dev Accuracy: 0.4249


Epoch 3/3: 100%|██████████| 267/267 [00:54<00:00,  4.86it/s, loss=1.39]


Epoch 3 completed in 54.96 seconds. Loss: 1.3692
Estimated remaining time: 0.00 seconds
Epoch 3, Dev Accuracy: 0.4552
Total training time: 164.57 seconds
{"{'lr': 0.0001, 'batch_size': 16, 'model_name': 'distilbert-base-uncased'}": 0.4429864287376404, "{'lr': 1e-05, 'batch_size': 16, 'model_name': 'distilbert-base-uncased'}": 0.5203620195388794, "{'lr': 1e-06, 'batch_size': 16, 'model_name': 'distilbert-base-uncased'}": 0.4791855216026306, "{'lr': 0.0001, 'batch_size': 32, 'model_name': 'distilbert-base-uncased'}": 0.4837104082107544, "{'lr': 1e-05, 'batch_size': 32, 'model_name': 'distilbert-base-uncased'}": 0.5140271782875061, "{'lr': 1e-06, 'batch_size': 32, 'model_name': 'distilbert-base-uncased'}": 0.4552036225795746}
