In [4]:
import datetime
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from transformers import BertTokenizer, BertModel
import spacy
import re

In [5]:
# GPU/CPU Device Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load Data
train = pd.read_csv('../public_data/train/track_a/eng.csv')
val = pd.read_csv('../public_data/dev/track_a/eng_a.csv')
emotions = ['Joy', 'Sadness', 'Surprise', 'Fear', 'Anger']

# Initialize BERT Tokenizer & Model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)
nlp = spacy.load("en_core_web_sm")

Using device: cpu


model.safetensors:  67%|######6   | 294M/440M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
# Preprocessing Function
def pre_process(text):
    text = re.sub(r"[.,;:!?'\"“”()]", "", text)  # Remove punctuation
    encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
    return encoded_input['input_ids'].squeeze(0).to(device)

# Convert Text to BERT Embeddings
def get_bert_embeddings(texts):
    embeddings = []
    for text in texts:
        input_ids = pre_process(text).unsqueeze(0)
        with torch.no_grad():
            outputs = bert_model(input_ids)
        embeddings.append(outputs.last_hidden_state[:, 0, :].cpu().numpy())  # Extract [CLS] token
    return np.vstack(embeddings)

X_train = get_bert_embeddings(train["text"])
X_val = get_bert_embeddings(val["text"])

# POS Feature Extraction
def get_pos_features(texts):
    return [[token.pos_ for token in nlp(text)] for text in texts]

train_pos_tags = get_pos_features(train["text"])
val_pos_tags = get_pos_features(val["text"])

# Convert POS Tags to Indices
pos_vocab = {pos: idx for idx, pos in enumerate(set(tag for tags in train_pos_tags for tag in tags))}
train_pos_indices = [[pos_vocab[tag] for tag in tags] for tags in train_pos_tags]
val_pos_indices = [[pos_vocab.get(tag, 0) for tag in tags] for tags in val_pos_tags]

# Pad POS Sequences to Fixed Length
max_length = max(max(len(seq) for seq in train_pos_indices), max(len(seq) for seq in val_pos_indices))
train_pos_indices = [seq + [0] * (max_length - len(seq)) for seq in train_pos_indices]
val_pos_indices = [seq + [0] * (max_length - len(seq)) for seq in val_pos_indices]

# Convert to PyTorch Tensors
train_pos_indices = torch.tensor(train_pos_indices, dtype=torch.long).to(device)
val_pos_indices = torch.tensor(val_pos_indices, dtype=torch.long).to(device)

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


In [7]:
# Trainable POS Embedding Layer
class POSEmbedding(nn.Module):
    def __init__(self, num_pos_tags, embedding_dim):
        super(POSEmbedding, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=num_pos_tags, embedding_dim=embedding_dim)

    def forward(self, pos_indices):
        return self.embedding(pos_indices)

pos_embedding_layer = POSEmbedding(len(pos_vocab), embedding_dim=16).to(device)

# Model Definition
class EmotionClassifier(nn.Module):
    def __init__(self, bert_dim=768, pos_dim=16, hidden_dim=128, output_dim=5):
        super(EmotionClassifier, self).__init__()
        self.fc1 = nn.Linear(bert_dim + pos_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, bert_embeddings, pos_indices):
        pos_embeds = pos_embedding_layer(pos_indices).mean(dim=1)  # Average POS embeddings
        combined_features = torch.cat((bert_embeddings, pos_embeds), dim=1)
        x = self.relu(self.fc1(combined_features))
        return self.fc2(x)

# Initialize Model
model = EmotionClassifier().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

In [8]:
# Prepare Training Data
y_train = torch.tensor(train[emotions].values, dtype=torch.float32).to(device)
y_val = torch.tensor(val[emotions].values, dtype=torch.float32).to(device)

train_features = torch.tensor(X_train, dtype=torch.float32).to(device)
val_features = torch.tensor(X_val, dtype=torch.float32).to(device)

dataset = TensorDataset(train_features, train_pos_indices, y_train)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

# Training Loop
epochs = 400
losses = []

for epoch in tqdm(range(epochs + 1), desc="Training Loop"):
    model.train()
    for features, pos_indices, labels in data_loader:
        optimizer.zero_grad()
        outputs = model(features, pos_indices)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}: Loss: {round(loss.item(), 3)}")
        torch.save(model.state_dict(), f'./08-02-25/net_epoch_{epoch}.pth')
        losses.append(round(loss.item(), 3))

print(f"Final Loss after {epochs} epochs: {losses[-1]}")

# Prediction Function
def get_predictions(X_val, pos_indices, model, threshold=0.5):
    model.eval()
    with torch.no_grad():
        yhat = torch.sigmoid(model(X_val, pos_indices)).cpu().numpy()
    return yhat > threshold

# Generate Predictions for Multiple Epochs
for i in range(5):
    epoch = i * 100
    model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))
    y_pred = get_predictions(val_features, val_pos_indices, model, 0.45)

    val_data_with_pred = pd.DataFrame(y_pred, columns=['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise'])
    val_data_with_pred = val_data_with_pred.astype(int)
    val_data_with_pred['id'] = val['id']
    val_data_with_pred = val_data_with_pred[['id', 'Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']]

    formatted_time = datetime.datetime.now().strftime('%Y-%m-%d_%H_%M_%S')
    val_data_with_pred.to_csv(f'../results/alt_exp_4/alt_exp_4_epoch_{epoch}_pred_eng_a_{formatted_time}.csv', index=False)

    print(val_data_with_pred)


Training Loop:   0%|          | 1/401 [00:01<08:52,  1.33s/it]

Epoch 0: Loss: 0.589


Training Loop:  25%|██▌       | 101/401 [06:14<23:24,  4.68s/it]

Epoch 100: Loss: 0.438


Training Loop:  50%|█████     | 201/401 [13:18<12:42,  3.81s/it]

Epoch 200: Loss: 0.457


Training Loop:  75%|███████▌  | 301/401 [20:15<07:44,  4.64s/it]

Epoch 300: Loss: 0.36


Training Loop: 100%|██████████| 401/401 [27:02<00:00,  4.05s/it]

Epoch 400: Loss: 0.273
Final Loss after 400 epochs: 0.273
                        id  Anger  Fear  Joy  Sadness  Surprise
0    eng_dev_track_a_00001      0     0    0        1         0
1    eng_dev_track_a_00002      0     0    0        1         0
2    eng_dev_track_a_00003      0     0    0        1         0
3    eng_dev_track_a_00004      0     0    0        1         0
4    eng_dev_track_a_00005      0     0    0        1         0
..                     ...    ...   ...  ...      ...       ...
111  eng_dev_track_a_00112      0     0    0        1         0
112  eng_dev_track_a_00113      0     0    0        1         0
113  eng_dev_track_a_00114      0     0    0        1         0
114  eng_dev_track_a_00115      0     0    0        1         0
115  eng_dev_track_a_00116      0     0    0        1         0

[116 rows x 6 columns]



  model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))
  model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))


                        id  Anger  Fear  Joy  Sadness  Surprise
0    eng_dev_track_a_00001      0     0    0        1         0
1    eng_dev_track_a_00002      0     0    0        1         0
2    eng_dev_track_a_00003      1     0    0        0         0
3    eng_dev_track_a_00004      0     1    0        1         0
4    eng_dev_track_a_00005      0     0    0        0         0
..                     ...    ...   ...  ...      ...       ...
111  eng_dev_track_a_00112      0     0    0        1         0
112  eng_dev_track_a_00113      0     0    1        0         0
113  eng_dev_track_a_00114      0     1    0        1         0
114  eng_dev_track_a_00115      0     0    0        1         0
115  eng_dev_track_a_00116      0     0    1        1         0

[116 rows x 6 columns]
                        id  Anger  Fear  Joy  Sadness  Surprise
0    eng_dev_track_a_00001      0     0    1        1         0
1    eng_dev_track_a_00002      0     0    0        1         0
2    eng_dev_tra

  model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))
  model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))
  model.load_state_dict(torch.load(f'./08-02-25/net_epoch_{epoch}.pth'))


                        id  Anger  Fear  Joy  Sadness  Surprise
0    eng_dev_track_a_00001      0     0    1        1         0
1    eng_dev_track_a_00002      0     0    0        1         0
2    eng_dev_track_a_00003      1     0    0        0         0
3    eng_dev_track_a_00004      0     1    0        1         0
4    eng_dev_track_a_00005      1     0    0        0         0
..                     ...    ...   ...  ...      ...       ...
111  eng_dev_track_a_00112      0     0    0        1         0
112  eng_dev_track_a_00113      0     0    1        1         0
113  eng_dev_track_a_00114      0     1    0        1         0
114  eng_dev_track_a_00115      0     0    0        0         0
115  eng_dev_track_a_00116      0     0    1        1         0

[116 rows x 6 columns]
