In [None]:
# !pip install numpy
# !pip install pandas
# !pip install scikit-learn
# !pip install torch
# !pip install transformers
!pip install emoji


from transformers import AutoModel, AutoTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, hamming_loss, roc_auc_score, average_precision_score
from collections import defaultdict
from torch.amp import autocast, GradScaler
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time

## Hyperparameters
MAX_LEN = 128
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
TEST_BATCH_SIZE = 32
EPOCHS = 12
LEARNING_RATE = 1e-05
THRESHOLD = 0.5 # threshold for the sigmoid


## Dataset Class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_len, target_column):
        self.tokenizer = tokenizer
        self.labels = df[target_column].tolist()  
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = str(self.texts[index])
        text = " ".join(text.split())
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,  
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.tensor(self.labels[index], dtype=torch.long)
        }

## Data
train_file_path = '/content/train.csv'
val_file_path = '/content/val.csv'
test_file_path = '/content/test.csv'

train_df = pd.read_csv(train_file_path)
val_df = pd.read_csv(val_file_path)
test_df = pd.read_csv(test_file_path)

# Specify the target column (assumed to be 'label')
target_column = 'label'

target_names = ["0", "1", "2", "3"]

## Tokenizer
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=True)


train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN, target_column)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN, target_column)
test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN, target_column)

#print(train_dataset[0])

## Data Loader
train_data_loader = torch.utils.data.DataLoader(train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
## Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

## Model

class BERTweetBase(nn.Module):
    def __init__(self, num_classes):
        super(BERTweetBase, self).__init__()
        self.bertweet = AutoModel.from_pretrained('vinai/bertweet-base')

        # Dropout layer
        self.drop = nn.Dropout(0.3)

        # Fully connected layer for classification
        self.fc = nn.Linear(768, num_classes)

    def forward(self, input_ids, attention_mask, token_type_ids=None):
        # BERTweet features
        outputs = self.bertweet(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids  # Optional token_type_ids
        )
        bertweet_features = outputs.last_hidden_state  # (batch_size, seq_length, 768)

        # Apply dropout
        bertweet_features = self.drop(bertweet_features)

        # Take the [CLS] token representation for classification
        cls_token = bertweet_features[:, 0, :]  # (batch_size, 768)

        # Final classification
        output = self.fc(cls_token)  # (batch_size, num_classes)

        return output


## Setting the model
model = BERTweetBase(num_classes=len(target_names))
model.to(device)

## Loss & Optimizer
def loss_fn(outputs, targets):
    return torch.nn.CrossEntropyLoss()(outputs, targets.long())

# define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3)

## Training function
def train_model(training_loader, model, optimizer):
    losses = []
    correct_predictions = 0
    num_samples = 0
    total_batches = len(training_loader)

    # Set model to training mode (activate dropout, batch norm)
    model.train()

    for batch_idx, data in enumerate(training_loader):
        ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
        mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
        targets = data['targets'].to(device, dtype=torch.long, non_blocking=True)

        # Forward pass
        outputs = model(ids, mask, token_type_ids)
        loss = loss_fn(outputs, targets)
        losses.append(loss.item())

        # Calculate training accuracy
        _, preds = torch.max(outputs, dim=1)
        correct_predictions += torch.sum(preds == targets)
        num_samples += targets.size(0)

        # Backward pass and optimizer step
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        optimizer.zero_grad()

    train_accuracy = float(correct_predictions) / num_samples
    average_loss = np.mean(losses)
    print(f"Training Accuracy: {train_accuracy:.4f} | Training Loss: {average_loss:.4f}")

    return model, train_accuracy, average_loss


def eval_model(validation_loader, model):
    model.eval()
    final_targets = []
    final_outputs = []
    final_probs = []
    losses = []

    with torch.no_grad():
        for data in validation_loader:
            ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
            mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
            targets = data['targets'].to(device, dtype=torch.long, non_blocking=True)

            # Get model outputs
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            # Predictions and probabilities
            probs = torch.softmax(outputs, dim=1)  # Softmax for probabilities
            _, preds = torch.max(outputs, dim=1)  # Predicted class indices
            final_outputs.extend(preds.cpu().numpy())
            final_probs.extend(probs.cpu().numpy())
            final_targets.extend(targets.cpu().numpy())

            torch.cuda.empty_cache()

    # Convert to numpy arrays
    final_targets = np.array(final_targets)
    final_outputs = np.array(final_outputs)
    final_probs = np.array(final_probs)

    # Accuracy
    acc = accuracy_score(final_targets, final_outputs)

    # Weighted metrics
    f1 = f1_score(final_targets, final_outputs, average='weighted')
    precision = precision_score(final_targets, final_outputs, average='weighted')
    recall = recall_score(final_targets, final_outputs, average='weighted')

    # Micro-averaged metrics
    micro_f1 = f1_score(final_targets, final_outputs, average='macro')
    micro_precision = precision_score(final_targets, final_outputs, average='macro')
    micro_recall = recall_score(final_targets, final_outputs, average='macro')

    # Hamming Loss
    hamming = hamming_loss(final_targets, final_outputs)

    # AUC-ROC and AUPR
    auc_roc = roc_auc_score(final_targets, final_probs, multi_class='ovr', average='macro')
    aupr = average_precision_score(final_targets, final_probs, average='macro')

    # Average Loss
    average_loss = np.mean(losses)

    # Print metrics
    print(f"Validation Accuracy: {acc:.4f}")
    print(f"Weighted F1 Score: {f1}")
    print(f"Macro F1 Score: {micro_f1}")
    print(f"Weighted Precision: {precision}")
    print(f"Macro Precision: {micro_precision}")
    print(f"Weighted Recall: {recall}")
    print(f"Macro Recall: {micro_recall}")
    print(f"AUC-ROC: {auc_roc}")
    print(f"AUPR: {aupr}")
    print("\nClassification Report:\n", classification_report(final_targets, final_outputs, target_names=target_names))

    return acc, average_loss


#Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Training & Evaluation Loop
# recording starting time
start = time.time()

history = defaultdict(list)
best_acc = 0.0  # Initialize best accuracy

for epoch in range(1, EPOCHS + 1):
    print(f'Epoch {epoch}/{EPOCHS}')
    model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
    val_acc, val_loss = eval_model(val_data_loader, model)

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    scheduler.step()

    # Save the best model based on accuracy
    if val_acc > best_acc:
        torch.save(model.state_dict(), "emotion_BERTweet32_best.bin")
        best_acc = val_acc

# recording end time
end = time.time()
print(f"Total training and evaluation time: {end - start} seconds")


## Testing
# Loading pretrained model (best model)
print("\n\nTesting\n\n")
model = BERTweetBase(num_classes=len(target_names))
model.load_state_dict(torch.load("emotion_BERTweet32_best.bin"))
model = model.to(device)

# recording starting time
start = time.time()
# Evaluate the model using the test data
eval_model(test_data_loader, model)
# recording end time
end = time.time()
print(f"Total test-set evaluation time: {end - start} seconds")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.91M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Epoch 1/12


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Training Accuracy: 0.4962 | Training Loss: 1.1747
Validation Accuracy: 0.7353
Weighted F1 Score: 0.7249736290960181
Macro F1 Score: 0.6592077586651791
Weighted Precision: 0.7332602419398755
Macro Precision: 0.7245877246855976
Weighted Recall: 0.7352941176470589
Macro Recall: 0.6335467568383777
AUC-ROC: 0.8682721009875173
AUPR: 0.7071261057115154

Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.89      0.81       160
           1       0.77      0.70      0.74        97
           2       0.69      0.32      0.44        28
           3       0.70      0.62      0.65        89

    accuracy                           0.74       374
   macro avg       0.72      0.63      0.66       374
weighted avg       0.73      0.74      0.72       374

Epoch 2/12
Training Accuracy: 0.7682 | Training Loss: 0.7307
Validation Accuracy: 0.7861
Weighted F1 Score: 0.7821326190117265
Macro F1 Score: 0.7259556593311037
Weighted Precision: 0.78531619

  model.load_state_dict(torch.load("emotion_BERTweet32_best_colab_final.bin"))


Validation Accuracy: 0.8311
Weighted F1 Score: 0.828980308393496
Macro F1 Score: 0.7983712318588901
Weighted Precision: 0.8297902225570478
Macro Precision: 0.8180754364237659
Weighted Recall: 0.8311048557353976
Macro Recall: 0.7840474375009606
AUC-ROC: 0.9422088051773332
AUPR: 0.8567672733645113

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.91      0.87       558
           1       0.85      0.82      0.84       358
           2       0.77      0.61      0.68       123
           3       0.81      0.80      0.80       382

    accuracy                           0.83      1421
   macro avg       0.82      0.78      0.80      1421
weighted avg       0.83      0.83      0.83      1421

Total test-set evaluation time: 2.839136838912964 seconds
