# How accurate will RoBERTa, a transformer-based model, be without much fine-tuning in comparison to the other methods?

Compared to the previous methods we used on our data, RoBERTa has a much more complex architecture. Because of this, our expectation is that in the long run it will perform better than methods like multinomial Naive Bayes and clustering. However, the results of RoBERTa depend on the fine-tuning of various knobs in the model, so it might take time to find the right training environment.

In [1]:
import torch
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [88]:
posts = pd.read_csv('/kaggle/input/all-data/AllDataFiltered.csv')

In [89]:
posts.shape

(10584, 2)

In [90]:
from sklearn.preprocessing import LabelEncoder
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

encoded_body = tokenizer(posts['combined_text'].tolist(), padding=True, truncation=True, return_tensors='pt')

label_encoder = LabelEncoder()
encoded_flairs = label_encoder.fit_transform(posts['link_flair_text'])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(encoded_body['input_ids'], encoded_flairs, test_size=0.2, random_state=52)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=52)

In [91]:
train_dataset = TensorDataset(X_train, torch.tensor(y_train))
test_dataset = TensorDataset(X_test, torch.tensor(y_test))
val_dataset = TensorDataset(X_val, torch.tensor(y_val))
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=len(set(encoded_flairs)))

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [92]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=False)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

In [93]:
from tqdm import tqdm

model.train()
for epoch in range(3):
    print(f'Epoch {epoch}')
    for i, batch in enumerate(tqdm(train_loader)):
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        inputs = inputs.long()
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    
    model.eval()
    val_losses = []
    with torch.no_grad():
        for i, val_batch in enumerate(tqdm(val_loader)):
            val_batch = [item.to(device) for item in val_batch]
            val_inputs, val_labels = val_batch
            val_inputs = val_inputs.long()
            val_labels = val_labels.long()
            val_outputs = model(val_inputs, labels=val_labels)
            val_loss = val_outputs.loss
            val_losses.append(val_loss.item())
            
    print(f"end of Epoch {epoch}")


Epoch 0


100%|██████████| 847/847 [07:37<00:00,  1.85it/s]
100%|██████████| 43/43 [00:07<00:00,  5.43it/s]


end of Epoch 0
Epoch 1


100%|██████████| 847/847 [07:23<00:00,  1.91it/s]
100%|██████████| 43/43 [00:07<00:00,  5.43it/s]


end of Epoch 1
Epoch 2


100%|██████████| 847/847 [07:23<00:00,  1.91it/s]
100%|██████████| 43/43 [00:07<00:00,  5.43it/s]

end of Epoch 2





In [94]:
model.eval()
predictions = []
true_labels = []
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [95]:
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        outputs = model(inputs)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=1).cpu().tolist())
        true_labels.extend(labels.cpu().tolist())

100%|██████████| 170/170 [00:30<00:00,  5.60it/s]


In [96]:
accuracy = accuracy_score(true_labels, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.5871234494979327


In [97]:
from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.68      0.72      0.70       387
           1       0.69      0.66      0.67       180
           2       0.53      0.53      0.53       369
           3       0.43      0.16      0.24       164
           4       0.47      0.50      0.48        28
           5       0.54      0.72      0.62        86
           6       0.74      0.97      0.84        72
           7       0.35      0.13      0.19        45
           8       0.63      0.48      0.54        61
           9       0.51      0.64      0.57       301

    accuracy                           0.59      1693
   macro avg       0.56      0.55      0.54      1693
weighted avg       0.58      0.59      0.57      1693



             precision    recall  f1-score   support

   academics       0.62      0.70      0.66       436
  admissions       0.72      0.60      0.66       195
      advice       0.44      0.55      0.49       468
  discussion       0.35      0.04      0.07       161
       event       1.00      0.09      0.16        23
        meme       0.00      0.00      0.00        16
        poll       0.82      0.89      0.85        94
         psa       0.00      0.00      0.00        27
        rant       0.64      0.14      0.23        65
student life       0.53      0.71      0.60       322

    accuracy                           0.56      1807
   macro avg       0.51      0.37      0.37      1807
weighted avg       0.54      0.56      0.52      1807