In [1]:
from torch.utils.data import DataLoader, Dataset
from transformers import AdamW, BertConfig, RobertaForSequenceClassification

from media_frame_transformer.dataset import PrimaryFrameDataset
from transformers import AdamW
from torch.nn import functional as F

ISSUE = 'climate'
BATCH_SIZE = 5
NUM_DATALOADER_WORKER = 2

In [2]:
train_set = PrimaryFrameDataset([ISSUE], "train")
test_set = PrimaryFrameDataset([ISSUE], "test")
train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)
test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)

PrimaryFrameDataset: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]
PrimaryFrameDataset: 100%|██████████| 1/1 [00:00<00:00,  2.68it/s]


In [3]:
len(train_loader), len(test_loader)

(754, 80)

In [4]:
len(train_set), len(test_set)

(3770, 399)

In [5]:
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=15,
    output_attentions=False,  # Whether the model returns attentions weights.
    output_hidden_states=False,  # Whether the model returns all hidden-states.
)
model = model.cuda()

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [21]:
optimizer = AdamW(model.parameters(),  lr = 1e-5)

for i, batch in enumerate(train_loader):
    optimizer.zero_grad()
    model.train()

    x = batch['x'].cuda()
    y = batch['y'].cuda()
    # print(x.shape, y.shape)

    outputs = model(x)
    loss = F.cross_entropy(outputs.logits, y)
    loss.backward()
    optimizer.step()
    
    print(i, loss)


0 tensor(0.9658, device='cuda:0', grad_fn=<NllLossBackward>)
1 tensor(0.0222, device='cuda:0', grad_fn=<NllLossBackward>)
2 tensor(0.2631, device='cuda:0', grad_fn=<NllLossBackward>)
3 tensor(0.4020, device='cuda:0', grad_fn=<NllLossBackward>)
4 tensor(1.4566, device='cuda:0', grad_fn=<NllLossBackward>)
5 tensor(0.0347, device='cuda:0', grad_fn=<NllLossBackward>)
6 tensor(0.0725, device='cuda:0', grad_fn=<NllLossBackward>)
7 tensor(0.1395, device='cuda:0', grad_fn=<NllLossBackward>)
8 tensor(0.2675, device='cuda:0', grad_fn=<NllLossBackward>)
9 tensor(0.1560, device='cuda:0', grad_fn=<NllLossBackward>)
10 tensor(0.1382, device='cuda:0', grad_fn=<NllLossBackward>)
11 tensor(0.1191, device='cuda:0', grad_fn=<NllLossBackward>)
12 tensor(0.7737, device='cuda:0', grad_fn=<NllLossBackward>)
13 tensor(0.0172, device='cuda:0', grad_fn=<NllLossBackward>)
14 tensor(0.1483, device='cuda:0', grad_fn=<NllLossBackward>)
15 tensor(0.1991, device='cuda:0', grad_fn=<NllLossBackward>)
16 tensor(0.0190, 

In [22]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(train_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(train_set))

100%|██████████| 754/754 [01:01<00:00, 12.36it/s]
tensor(0.9719, device='cuda:0')


In [23]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(test_set))

100%|██████████| 80/80 [00:06<00:00, 12.38it/s]
tensor(0.7519, device='cuda:0')
