<!--<badge>--><a href="https://colab.research.google.com/github/ankur-98/BERT_GLUE/blob/main/single_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a><!--</badge>-->

# For colab run:

In [None]:
# Switch to GPU runtime
! git clone https://github.com/ankur-98/BERT_GLUE.git
import os 
os.chdir("BERT_GLUE")
! pip install datasets transformers

# Imports

In [1]:
import torch
from tqdm.auto import tqdm
from dataloader import get_dataloader
from transformers import BertForSequenceClassification
from train import training_step
from util import *

# Configs
### Tasks: {"cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"}

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_checkpoint="bert-base-uncased"
task = "sst2"
batch_size=96
steps = 2000
lr = 2e-5
lr_scheduler_type = "linear" # "linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"

# Load Dataloader and Pre-trained BERT Model

In [3]:
num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
train_epoch_iterator = get_dataloader(task, model_checkpoint, "train", batch_size=batch_size)
model = BertForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels).to(device)

Reusing dataset glue (C:\Users\ankur\.cache\huggingface\datasets\glue\sst2\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
100%|██████████| 68/68 [00:02<00:00, 23.35ba/s]
100%|██████████| 1/1 [00:00<00:00, 24.39ba/s]
100%|██████████| 2/2 [00:00<00:00, 23.53ba/s]


DatasetDict({
    train: Dataset({
        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence', 'token_type_ids'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence', 'token_type_ids'],
        num_rows: 872
    })
    test: Dataset({
        features: ['attention_mask', 'idx', 'input_ids', 'label', 'sentence', 'token_type_ids'],
        num_rows: 1821
    })
})


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

# Optimizer and LR Scheduler

In [4]:
Optimizer = create_optimizer(model, learning_rate=lr)
LR_scheduler = create_scheduler(Optimizer, lr_scheduler_type)
Metric, Metric_1 = get_metrics(task)
tr_loss = []
tr_metric = []
tr_metric_1 = []

# Training loop

In [6]:
global_steps = 0
trange = range(len(train_epoch_iterator))
pbar = tqdm(trange, initial=global_steps, total=steps)
for e in range((steps//len(train_epoch_iterator))+1):
    iterator = iter(train_epoch_iterator)
    for step in trange:
        global_steps += 1
        pbar.update()
        
        inputs = prepare_inputs(iterator.next(), device)
        step_loss, step_metric, step_metric_1 = training_step(model, inputs, Optimizer, LR_scheduler, Metric, Metric_1)
        tr_loss.append(step_loss)
        tr_metric.append(torch.tensor(list(step_metric.values())[0]))
        if Metric_1 is not None: tr_metric_1.append(torch.tensor(list(step_metric_1.values())[0]))
        
        step_evaluation = {}
        step_evaluation['loss'] = torch.stack(tr_loss[-len(train_epoch_iterator):]).mean().item()
        step_evaluation[f"{Metric.__class__.__name__}"] = torch.stack(tr_metric)[-len(train_epoch_iterator):].mean().item()
        if Metric_1 is not None:
            step_evaluation[f"{Metric_1.__class__.__name__}"] = torch.stack(tr_metric_1)[-len(train_epoch_iterator):].mean().item()
        pbar.set_postfix(step_evaluation)
        
        if global_steps == steps:
            break

100%|██████████| 2000/2000 [27:09<00:00,  1.39it/s, loss=0.635, Accuracy=0.658]