# How accurate will RoBERTa, a transformer-based model, be without much fine-tuning in comparison to the other methods?

Compared to the previous methods we used on our data, RoBERTa has a much more complex architecture. Because of this, our expectation is that in the long run it will perform better than methods like multinomial Naive Bayes and clustering. However, the results of RoBERTa depend on the fine-tuning of various knobs in the model, so it might take time to find the right training environment.

In [1]:
import torch
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
posts = pd.read_csv('/kaggle/input/all-data/AllDataFiltered.csv')

In [3]:
posts.shape

(10584, 2)

In [4]:
from sklearn.preprocessing import LabelEncoder
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

encoded_body = tokenizer(posts['combined_text'].tolist(), padding=True, truncation=True, return_tensors='pt')

label_encoder = LabelEncoder()
encoded_flairs = label_encoder.fit_transform(posts['link_flair_text'])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(encoded_body['input_ids'], encoded_flairs, test_size=0.2, random_state=52)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=52)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [5]:
train_dataset = TensorDataset(X_train, torch.tensor(y_train))
test_dataset = TensorDataset(X_test, torch.tensor(y_test))
val_dataset = TensorDataset(X_val, torch.tensor(y_val))

In [6]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=len(set(encoded_flairs)))
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=False)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from tqdm import tqdm

model.train()
for epoch in range(3):
    print(f'Epoch {epoch}')
    for i, batch in enumerate(tqdm(train_loader)):
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        inputs = inputs.long()
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    
    model.eval()
    val_losses = []
    with torch.no_grad():
        for i, val_batch in enumerate(tqdm(val_loader)):
            val_batch = [item.to(device) for item in val_batch]
            val_inputs, val_labels = val_batch
            val_inputs = val_inputs.long()
            val_labels = val_labels.long()
            val_outputs = model(val_inputs, labels=val_labels)
            val_loss = val_outputs.loss
            val_losses.append(val_loss.item())
            
    print(f"end of Epoch {epoch}")


Epoch 0


  0%|          | 0/847 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
100%|██████████| 847/847 [07:49<00:00,  1.81it/s]
100%|██████████| 43/43 [00:07<00:00,  5.38it/s]


end of Epoch 0
Epoch 1


100%|██████████| 847/847 [07:35<00:00,  1.86it/s]
100%|██████████| 43/43 [00:07<00:00,  5.39it/s]


end of Epoch 1
Epoch 2


100%|██████████| 847/847 [07:35<00:00,  1.86it/s]
100%|██████████| 43/43 [00:07<00:00,  5.39it/s]

end of Epoch 2





In [8]:
model.eval()
predictions = []
true_labels = []
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [17]:
threshold = 0.33

correct = 0
total = 0

with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        outputs = model(inputs)
        probs = torch.softmax(outputs.logits, dim=-1)
        high_prob_indices = torch.where(probs > threshold)[1]

        correct += (labels.cpu().tolist()[0] in high_prob_indices)
        total += 1

accuracy = correct / total
print("Accuracy:", accuracy)

100%|██████████| 170/170 [00:30<00:00,  5.57it/s]

Accuracy: 0.8705882352941177





In [12]:
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        outputs = model(inputs)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=1).cpu().tolist())
        true_labels.extend(labels.cpu().tolist())

100%|██████████| 170/170 [00:30<00:00,  5.56it/s]


In [13]:
accuracy = accuracy_score(true_labels, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.5723567631423508


In [12]:
from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.66      0.80      0.72       387
           1       0.71      0.61      0.66       180
           2       0.54      0.50      0.52       369
           3       0.44      0.07      0.13       164
           4       0.50      0.39      0.44        28
           5       0.45      0.84      0.59        86
           6       0.80      0.96      0.87        72
           7       0.38      0.24      0.30        45
           8       0.54      0.62      0.58        61
           9       0.54      0.60      0.57       301

    accuracy                           0.59      1693
   macro avg       0.56      0.56      0.54      1693
weighted avg       0.58      0.59      0.56      1693



In [13]:
model.push_to_hub("Flair-It-RoBERTa", use_auth_token="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")



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/aalexzhang/Flair-It-RoBERTa/commit/12e60be2b37aaa2438358cd3446f2e76c966bc93', commit_message='Upload RobertaForSequenceClassification', commit_description='', oid='12e60be2b37aaa2438358cd3446f2e76c966bc93', pr_url=None, pr_revision=None, pr_num=None)