In [None]:
# !pip install numpy
# !pip install pandas
# !pip install scikit-learn
# !pip install torch
# !pip install transformers


from transformers import RobertaModel, RobertaTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, hamming_loss, roc_auc_score, average_precision_score
from collections import defaultdict
from torch.amp import autocast, GradScaler
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time

## Hyperparameters
MAX_LEN = 128
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
TEST_BATCH_SIZE = 32
EPOCHS = 12
LEARNING_RATE = 1e-05
THRESHOLD = 0.5 # threshold for the sigmoid


## Dataset Class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_len, target_column):
        self.tokenizer = tokenizer
        self.texts = df['text'].tolist()    # Use 'text' column
        self.labels = df[target_column].tolist()  # Use 'label' column
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = str(self.texts[index])
        text = " ".join(text.split())
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,   # For models that need token_type_ids, e.g., BERT
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.tensor(self.labels[index], dtype=torch.long)
        }

## Data
train_file_path = '/kaggle/input/emotion/train.csv'
val_file_path = '/kaggle/input/emotion/val.csv'
test_file_path = '/kaggle/input/emotion/test.csv'

train_df = pd.read_csv(train_file_path)
val_df = pd.read_csv(val_file_path)
test_df = pd.read_csv(test_file_path)

# Specify the target column (assumed to be 'label')
target_column = 'label'

target_names = ["0", "1", "2", "3"]

## Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN, target_column)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN, target_column)
test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN, target_column)

#print(train_dataset[0])

## Data Loader
train_data_loader = torch.utils.data.DataLoader(train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
## Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

## Model

class RobertaBase(nn.Module):
    def __init__(self, num_classes):
        super(RobertaBase, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')

        # Dropout layer
        self.drop = nn.Dropout(0.3)

        # Fully connected layer for classification
        self.fc = nn.Linear(768, num_classes)

    def forward(self, input_ids, attention_mask, token_type_ids=None):
        # RoBERTa features
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        roberta_features = outputs.last_hidden_state  # (batch_size, seq_length, 768)

        roberta_features = self.drop(roberta_features)  # (batch_size, seq_length, 768)

        # Taking the [CLS] token representation for classification (first token)
        cls_token = roberta_features[:, 0, :]  # (batch_size, 768)

        # Final classification
        output = self.fc(cls_token)  # (batch_size, num_classes)

        return output

## Setting the model
model = RobertaBase(num_classes=len(target_names))
model.to(device)

## Loss & Optimizer
def loss_fn(outputs, targets):
    return torch.nn.CrossEntropyLoss()(outputs, targets.long())

# define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3)

def train_model(training_loader, model, optimizer):
    losses = []
    correct_predictions = 0
    num_samples = 0
    total_batches = len(training_loader)

    # Set model to training mode (activate dropout, batch norm)
    model.train()

    for batch_idx, data in enumerate(training_loader):
        ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
        mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
        targets = data['targets'].to(device, dtype=torch.long, non_blocking=True)

        # Forward pass
        outputs = model(ids, mask)
        loss = loss_fn(outputs, targets)
        losses.append(loss.item())

        # Calculate training accuracy
        _, preds = torch.max(outputs, dim=1)
        correct_predictions += torch.sum(preds == targets)
        num_samples += targets.size(0)

        # Backward pass and optimizer step
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        optimizer.zero_grad()

    train_accuracy = float(correct_predictions) / num_samples
    average_loss = np.mean(losses)
    print(f"Training Accuracy: {train_accuracy:.4f} | Training Loss: {average_loss:.4f}")
    
    return model, train_accuracy, average_loss


def eval_model(validation_loader, model):
    model.eval()
    final_targets = []
    final_outputs = []
    losses = []

    with torch.no_grad():
        for data in validation_loader:
            ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
            mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
            targets = data['targets'].to(device, dtype=torch.long, non_blocking=True)

            outputs = model(ids, mask)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            _, preds = torch.max(outputs, dim=1)
            final_outputs.extend(preds.cpu().numpy())
            final_targets.extend(targets.cpu().numpy())

            torch.cuda.empty_cache()

    acc = accuracy_score(final_targets, final_outputs)
    f1 = f1_score(final_targets, final_outputs, average='weighted')
    precision = precision_score(final_targets, final_outputs, average='weighted')
    recall = recall_score(final_targets, final_outputs, average='weighted')
    hamming = hamming_loss(final_targets, final_outputs)

    average_loss = np.mean(losses)

    print(f"Validation Accuracy: {acc:.4f}")
    print(f"F1 Score: {f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Hamming Loss: {hamming}")
    print(f"Average Loss: {average_loss}")
    print("\nClassification Report:\n", classification_report(final_targets, final_outputs, target_names=target_names))

    return acc, average_loss


#Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Training & Evaluation Loop
# recording starting time
start = time.time()

history = defaultdict(list)
best_acc = 0.0  # Initialize best accuracy

for epoch in range(1, EPOCHS + 1):
    print(f'Epoch {epoch}/{EPOCHS}')
    model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
    val_acc, val_loss = eval_model(val_data_loader, model)

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    scheduler.step()

    # Save the best model based on accuracy
    if val_acc > best_acc:
        torch.save(model.state_dict(), "emotion_RoBERTa_32.bin")
        best_acc = val_acc

# recording end time
end = time.time()
print(f"Total training and evaluation time: {end - start} seconds")


## Testing
# Loading pretrained model (best model)
print("\n\nTesting\n\n")
model = RobertaBase(num_classes=len(target_names))
model.load_state_dict(torch.load("emotion_RoBERTa_32.bin"))
model = model.to(device)

# recording starting time
start = time.time()
# Evaluate the model using the test data
eval_model(test_data_loader, model)
# recording end time
end = time.time()
print(f"Total test-set evaluation time: {end - start} seconds")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12
Training Accuracy: 0.5533 | Training Loss: 1.0586
Validation Accuracy: 0.7460
F1 Score: 0.738690236087453
Precision: 0.7377585699002909
Recall: 0.7459893048128342
Hamming Loss: 0.2540106951871658
Average Loss: 0.7372631058096886

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.86      0.82       160
           1       0.74      0.75      0.74        97
           2       0.53      0.32      0.40        28
           3       0.73      0.66      0.69        89

    accuracy                           0.75       374
   macro avg       0.69      0.65      0.66       374
weighted avg       0.74      0.75      0.74       374

Epoch 2/12
Training Accuracy: 0.7897 | Training Loss: 0.5993
Validation Accuracy: 0.7888
F1 Score: 0.7880808524718327
Precision: 0.7930798536334868
Recall: 0.7887700534759359
Hamming Loss: 0.21122994652406418
Average Loss: 0.6341638118028641

Classification Report:
               precision    recall

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load("emotion_RoBERTa_32_best_NEW.bin"))


Validation Accuracy: 0.8177
F1 Score: 0.8173648903381142
Precision: 0.8196498743639026
Recall: 0.8177339901477833
Hamming Loss: 0.18226600985221675
Average Loss: 0.6631614198287328

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.84      0.86       558
           1       0.77      0.88      0.82       358
           2       0.74      0.66      0.70       123
           3       0.81      0.79      0.80       382

    accuracy                           0.82      1421
   macro avg       0.80      0.79      0.79      1421
weighted avg       0.82      0.82      0.82      1421

Total test-set evaluation time: 5.6134889125823975 seconds
