In [1]:
from torch.utils.data import DataLoader, Dataset
from transformers import AdamW, BertConfig, RobertaForSequenceClassification

from media_frame_transformer.dataset import PrimaryFrameDataset
from transformers import AdamW
from torch.nn import functional as F
# from config import ISSUES
from tqdm import tqdm

ISSUES = [
    "climate",
    "deathpenalty",
    "guncontrol",
    # "immigration",
    "samesex",
    "tobacco",
]

BATCH_SIZE = 25
NUM_DATALOADER_WORKER = 4

In [2]:
train_set = PrimaryFrameDataset(ISSUES, "train")
test_set = PrimaryFrameDataset(["immigration"], "test")
train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)
test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_DATALOADER_WORKER,
)

PrimaryFrameDataset: 100%|██████████| 5/5 [00:02<00:00,  1.70it/s]
PrimaryFrameDataset: 100%|██████████| 1/1 [00:00<00:00,  2.02it/s]


In [3]:
len(train_loader), len(test_loader)

(922, 16)

In [4]:
len(train_set), len(test_set)

(23034, 399)

In [5]:
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=15,
    output_attentions=False,  # Whether the model returns attentions weights.
    output_hidden_states=False,  # Whether the model returns all hidden-states.
)
model = model.cuda()

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [12]:
optimizer = AdamW(model.parameters(),  lr = 1e-5)

for i, batch in enumerate(tqdm(train_loader)):
    optimizer.zero_grad()
    model.train()

    x = batch['x'].cuda()
    y = batch['y'].cuda()
    # print(x.shape, y.shape)

    outputs = model(x)
    loss = F.cross_entropy(outputs.logits, y)
    loss.backward()
    optimizer.step()
    
    if i%100 == 0:
        print(i, loss)

  0%|          | 1/922 [00:00<08:31,  1.80it/s]0 tensor(0.8725, device='cuda:0', grad_fn=<NllLossBackward>)
 11%|█         | 101/922 [00:43<05:50,  2.34it/s]100 tensor(0.8663, device='cuda:0', grad_fn=<NllLossBackward>)
 22%|██▏       | 201/922 [01:25<05:05,  2.36it/s]200 tensor(0.7479, device='cuda:0', grad_fn=<NllLossBackward>)
 33%|███▎      | 301/922 [02:08<04:23,  2.36it/s]300 tensor(0.7900, device='cuda:0', grad_fn=<NllLossBackward>)
 43%|████▎     | 401/922 [02:51<03:42,  2.35it/s]400 tensor(0.7998, device='cuda:0', grad_fn=<NllLossBackward>)
 54%|█████▍    | 501/922 [03:33<03:00,  2.33it/s]500 tensor(1.5004, device='cuda:0', grad_fn=<NllLossBackward>)
 65%|██████▌   | 601/922 [04:16<02:17,  2.34it/s]600 tensor(1.0457, device='cuda:0', grad_fn=<NllLossBackward>)
 76%|███████▌  | 701/922 [04:59<01:34,  2.34it/s]700 tensor(1.2280, device='cuda:0', grad_fn=<NllLossBackward>)
 87%|████████▋ | 801/922 [05:42<00:51,  2.34it/s]800 tensor(0.8271, device='cuda:0', grad_fn=<NllLossBackwar

In [13]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(train_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(train_set))

100%|██████████| 922/922 [02:08<00:00,  7.17it/s]tensor(0.7478, device='cuda:0')



In [14]:
from tqdm import tqdm
import torch  

num_correct = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        model.eval()
        x = batch['x'].cuda()
        y = batch['y'].cuda()
        outputs = model(x)
        preds = torch.argmax(outputs.logits, dim=-1)
        correct = (preds == y)
        num_correct += correct.sum()

print(num_correct / len(test_set))

100%|██████████| 16/16 [00:02<00:00,  7.12it/s]
tensor(0.5263, device='cuda:0')
