In [1]:
from torch.utils.data import DataLoader, Dataset
from transformers import AdamW, BertConfig, RobertaForSequenceClassification

from media_frame_transformer.dataset import PrimaryFrameDataset
from transformers import AdamW
from torch.nn import functional as F
# from config import ISSUES
from tqdm import tqdm

ISSUES = [
    "climate",
    # "deathpenalty",
    # "guncontrol",
    # "immigration",
    # "samesex",
    # "tobacco",
]

BATCH_SIZE = 25
NUM_DATALOADER_WORKER = 4

In [2]:
train_set = PrimaryFrameDataset(ISSUES, "train")
test_set = PrimaryFrameDataset(["climate"], "test")
train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)
test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)

PrimaryFrameDataset: 100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
PrimaryFrameDataset: 100%|██████████| 1/1 [00:00<00:00,  2.41it/s]


In [3]:
len(train_loader), len(test_loader)

(151, 16)

In [4]:
len(train_set), len(test_set)

(3770, 399)

In [5]:
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=15,
    output_attentions=False,  # Whether the model returns attentions weights.
    output_hidden_states=False,  # Whether the model returns all hidden-states.
)
model = model.cuda()

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [6]:
optimizer = AdamW(model.parameters(),  lr = 1e-5)

for i, batch in enumerate(tqdm(train_loader)):
    optimizer.zero_grad()
    model.train()

    x = batch['x'].cuda()
    y = batch['y'].cuda()
    print(x.shape, y.shape)

    outputs = model(x)
    loss = F.cross_entropy(outputs.logits, y)
    loss.backward()
    optimizer.step()
    
    if i%10 == 0:
        print(i, loss / y.shape[0])

  0%|          | 0/151 [00:00<?, ?it/s]torch.Size([25, 512]) torch.Size([25])
  1%|          | 1/151 [00:00<01:19,  1.88it/s]0 tensor(0.1078, device='cuda:0', grad_fn=<DivBackward0>)
torch.Size([25, 512]) torch.Size([25])
  1%|▏         | 2/151 [00:00<01:09,  2.15it/s]torch.Size([25, 512]) torch.Size([25])
  2%|▏         | 3/151 [00:01<01:05,  2.24it/s]torch.Size([25, 512]) torch.Size([25])
  3%|▎         | 4/151 [00:01<01:04,  2.29it/s]torch.Size([25, 512]) torch.Size([25])
  3%|▎         | 5/151 [00:02<01:02,  2.32it/s]torch.Size([25, 512]) torch.Size([25])
  4%|▍         | 6/151 [00:02<01:02,  2.34it/s]torch.Size([25, 512]) torch.Size([25])
  5%|▍         | 7/151 [00:03<01:01,  2.35it/s]torch.Size([25, 512]) torch.Size([25])
  5%|▌         | 8/151 [00:03<01:00,  2.35it/s]torch.Size([25, 512]) torch.Size([25])
  6%|▌         | 9/151 [00:03<01:00,  2.35it/s]torch.Size([25, 512]) torch.Size([25])
  7%|▋         | 10/151 [00:04<00:59,  2.36it/s]torch.Size([25, 512]) torch.Size([25])
  7

KeyboardInterrupt: 

In [10]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(train_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(train_set))

100%|██████████| 151/151 [00:20<00:00,  7.21it/s]
tensor(0.6546, device='cuda:0')


In [11]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(test_set))

100%|██████████| 16/16 [00:02<00:00,  7.21it/s]
tensor(0.6441, device='cuda:0')
