In [1]:
from transformers import RobertaModel, RobertaTokenizer
from torch.utils.data import DataLoader

In [2]:
# Load RoBERTa tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
roberta = RobertaModel.from_pretrained("roberta-base")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
for param in roberta.parameters():
    param.requires_grad = False  # Freeze all layers

In [4]:
import pandas as pd
import torch
from torch.utils.data import Dataset

LABEL_MAPPING = {
    "pants-fire": 0,
    "false": 1,
    "barely-true": 2,
    "half-true": 3,
    "mostly-true": 4,
    "true": 5,
}


class LiarPlusStatementsDataset(Dataset):
    def __init__(self, filepath: str, tokenizer, max_length: int = 128):
        self.df = pd.read_csv(filepath, sep="\t")
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.df.index)

    def __getitem__(self, index: int):
        statement = self.df.iloc[index]["statement"]
        label_str = self.df.iloc[index]["label"]

        # Convert label to integer
        label = LABEL_MAPPING[label_str]

        # Tokenize the statement
        encoding = self.tokenizer(
            statement,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),  # Remove batch dim
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),  # Ensure tensor
        }


In [5]:
training_data = LiarPlusStatementsDataset("data/train2.tsv", tokenizer)

In [6]:
batch_size = 64

train_dataloader = DataLoader(
    training_data, batch_size=batch_size, shuffle=True
)

In [7]:
import torch
import torch.nn as nn


class LiarPlusStatementsClassifier(nn.Module):
    def __init__(self, encoder_model, num_classes):
        super(LiarPlusStatementsClassifier, self).__init__()
        self.encoder = encoder_model  # Pretrained encoder
        self.fc = nn.Linear(
            self.encoder.config.hidden_size, num_classes
        )  # Custom classifier

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():  # Ensure encoder remains frozen
            outputs = self.encoder(
                input_ids=input_ids, attention_mask=attention_mask
            )
        # Use [CLS] token output
        cls_output = outputs.last_hidden_state[:, 0, :]
        print(cls_output.shape)
        logits = self.fc(cls_output)  # Pass through trainable classifier
        return logits


In [8]:
num_classes = 6

In [9]:
model = LiarPlusStatementsClassifier(roberta, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

LiarPlusStatementsClassifier(
  (encoder): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [10]:
example = next(iter(train_dataloader))

In [11]:
example

{'input_ids': tensor([[    0, 28084,     7,  ...,     1,     1,     1],
         [    0,   104,  4113,  ...,     1,     1,     1],
         [    0,  3972,   582,  ...,     1,     1,     1],
         ...,
         [    0,  1779,     5,  ...,     1,     1,     1],
         [    0, 39254,  5141,  ...,     1,     1,     1],
         [    0, 34053,  8279,  ...,     1,     1,     1]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'label': tensor([3, 2, 4, 1, 3, 1, 0, 0, 4, 1, 1, 4, 5, 1, 4, 2, 1, 1, 0, 2, 4, 3, 2, 3,
         3, 3, 3, 4, 2, 1, 3, 4, 4, 5, 4, 1, 1, 4, 1, 2, 3, 3, 1, 4, 2, 4, 2, 4,
         5, 2, 1, 1, 1, 3, 1, 5, 4, 5, 4, 3, 4, 5, 4, 1])}

In [12]:
example['input_ids'].shape

torch.Size([64, 128])

In [13]:
example['attention_mask'].shape

torch.Size([64, 128])

In [14]:
res = model(input_ids=example['input_ids'].to(device), attention_mask=example['attention_mask'].to(device))

  attn_output = torch.nn.functional.scaled_dot_product_attention(


torch.Size([64, 768])


In [15]:
res.shape

torch.Size([64, 6])

In [16]:
res

tensor([[ 0.0584,  0.3718,  0.2784,  0.1224, -0.2526, -0.0375],
        [ 0.0589,  0.3739,  0.2665,  0.1146, -0.2365, -0.0486],
        [ 0.0495,  0.3763,  0.2678,  0.1206, -0.2130, -0.0458],
        [ 0.0569,  0.3656,  0.2750,  0.1211, -0.2257, -0.0303],
        [ 0.0474,  0.3533,  0.3066,  0.1286, -0.2503, -0.0571],
        [ 0.0558,  0.3500,  0.2638,  0.1218, -0.2552, -0.0616],
        [ 0.0709,  0.3726,  0.2645,  0.0956, -0.2590, -0.0145],
        [ 0.0699,  0.3549,  0.2939,  0.1410, -0.2526, -0.0527],
        [ 0.0524,  0.3605,  0.2717,  0.1232, -0.2544, -0.0526],
        [ 0.0689,  0.3620,  0.2627,  0.1083, -0.2256, -0.0381],
        [ 0.0527,  0.3257,  0.2447,  0.1272, -0.2257, -0.0562],
        [ 0.0484,  0.3380,  0.2562,  0.1222, -0.2131, -0.0460],
        [ 0.0610,  0.3690,  0.2945,  0.1249, -0.2575, -0.0632],
        [ 0.0697,  0.3474,  0.2856,  0.1104, -0.2246, -0.0450],
        [ 0.0497,  0.3551,  0.2991,  0.1264, -0.2470, -0.0586],
        [ 0.0598,  0.3488,  0.2849,  0.1