# Roberta-base Model
## 27 Emotions + Neutral

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, AdamW
import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_columns", None)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.empty_cache()

### Import Datasets

In [2]:
df_train = pd.read_csv('train27.csv')
df_test = pd.read_csv('test27.csv')

### Model Parameters

In [3]:
MAX_LEN = 200
TRAIN_BATCH_SIZE = 64
VALID_BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 2e-5
model_id = 'roberta-base'
tokenizer = AutoTokenizer.from_pretrained(model_id)
target_cols = [col for col in df_train.columns if col not in ['text']]

### Load Datasets and Model

In [4]:
class MyDataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.df = df
        self.max_len = max_len
        self.text = df.text
        self.tokenizer = tokenizer
        self.targets = df[target_cols].values

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(
            str(text),
            truncation = True,
            add_special_tokens = True,
            max_length = self.max_len,
            padding = 'max_length',
            return_token_type_ids = True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']

        return {
            'ids' : torch.tensor(ids, dtype = torch.long),
            'mask' : torch.tensor(mask, dtype = torch.long),
            'token_type_ids' : torch.tensor(token_type_ids, dtype = torch.long),
            'targets' : torch.tensor(self.targets[index], dtype = torch.float)
        }

In [5]:
train_dataset = MyDataset(df_train, tokenizer, MAX_LEN)
test_dataset = MyDataset(df_test, tokenizer, MAX_LEN)

In [6]:
train_loader = DataLoader(train_dataset, batch_size = TRAIN_BATCH_SIZE,
                          num_workers = 4, shuffle = True, pin_memory = True)
test_loader = DataLoader(test_dataset, batch_size = VALID_BATCH_SIZE,
                          num_workers = 4, shuffle = False, pin_memory = True)

In [7]:
class ModelClass(torch.nn.Module):
    def __init__(self):
        super(ModelClass, self).__init__()
        self.roberta = AutoModel.from_pretrained(model_id)
        self.fc = torch.nn.Linear(768, 28)

    def forward(self, ids, mask, token_type_ids):
        _, features = self.roberta(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict = False)
        output = self.fc(features)
        return output
    
model = ModelClass()
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ModelClass(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNor

In [8]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

optimizer = AdamW(params = model.parameters(), lr = LEARNING_RATE, weight_decay = 1e-6)

### Model Training

In [9]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _%910 == 0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

In [10]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 0, Loss:  0.6890712976455688
Epoch: 1, Loss:  0.19513031840324402
Epoch: 2, Loss:  0.17381398379802704
Epoch: 3, Loss:  0.17170092463493347
Epoch: 4, Loss:  0.16829028725624084
Epoch: 5, Loss:  0.16998213529586792
Epoch: 6, Loss:  0.13944374024868011
Epoch: 7, Loss:  0.18443332612514496
Epoch: 8, Loss:  0.1524791121482849
Epoch: 9, Loss:  0.14123478531837463
Epoch: 10, Loss:  0.11442680656909943
Epoch: 11, Loss:  0.12706799805164337
Epoch: 12, Loss:  0.0996953472495079
Epoch: 13, Loss:  0.0862596407532692
Epoch: 14, Loss:  0.08948088437318802
Epoch: 15, Loss:  0.08550835400819778
Epoch: 16, Loss:  0.08034571260213852
Epoch: 17, Loss:  0.05979788675904274
Epoch: 18, Loss:  0.0634041428565979
Epoch: 19, Loss:  0.057867903262376785


### Model Testing

In [11]:
def validation():
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [12]:
outputs, targets = validation()
outputs = np.array(outputs) >= 0.5
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
precision_micro = metrics.precision_score(targets, outputs, average = 'micro')
precision_macro = metrics.precision_score(targets, outputs, average = 'macro')
recall_micro = metrics.recall_score(targets, outputs, average = 'micro')
recall_macro = metrics.recall_score(targets, outputs, average = 'macro')
print(f"Accuracy = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")
print(f"Precision (Micro) = {precision_micro}")
print(f"Precision (Macro) = {precision_macro}")
print(f"Recall (Micro) = {recall_micro}")
print(f"Recall (Macro) = {recall_macro}")

Accuracy = 0.1
F1 Score (Micro) = 0.5298013245033112
F1 Score (Macro) = 0.34049356301660444
Precision (Micro) = 0.547945205479452
Precision (Macro) = 0.3628205128205128
Recall (Micro) = 0.5128205128205128
Recall (Macro) = 0.3551020408163265
