# **Addressing Overfitting**

In [None]:
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)

In [None]:
import torch.optim as optim
import numpy as np

class_counts = np.bincount(y_train_w2v)
class_weights = 1.0 / (class_counts + 1e-6)
class_weights = class_weights / class_weights.sum()

weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.CrossEntropyLoss(weight=weights)

optimizer = optim.Adam(model_w2v.parameters(), lr=1e-3)

In [None]:
model_w2v = MLP(input_dim=768, num_classes=num_classes).to(device)

In [None]:
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm

EPOCHS = 10

for epoch in range(EPOCHS):

    model_w2v.train()
    train_preds, train_labels = [], []

    for x, y in tqdm(train_loader_w2v):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model_w2v(x)
        loss = criterion(outputs, y)

        loss.backward()
        optimizer.step()

        train_preds.extend(outputs.argmax(1).cpu().numpy())
        train_labels.extend(y.cpu().numpy())

    train_acc = accuracy_score(train_labels, train_preds)

    model_w2v.eval()
    val_preds, val_labels = [], []

    with torch.no_grad():
        for x, y in val_loader_w2v:
            x, y = x.to(device), y.to(device)
            outputs = model_w2v(x)

            val_preds.extend(outputs.argmax(1).cpu().numpy())
            val_labels.extend(y.cpu().numpy())

    val_acc = accuracy_score(val_labels, val_preds)
    val_f1 = f1_score(val_labels, val_preds, average="macro")

    print(f"Epoch {epoch+1}: "
          f"Train Acc={train_acc:.4f}, "
          f"Val Acc={val_acc:.4f}, "
          f"Val F1={val_f1:.4f}")

100%|██████████| 702/702 [00:03<00:00, 192.45it/s]


Epoch 1: Train Acc=0.6723, Val Acc=0.2929, Val F1=0.1822


100%|██████████| 702/702 [00:02<00:00, 311.33it/s]


Epoch 2: Train Acc=0.7758, Val Acc=0.3267, Val F1=0.2238


100%|██████████| 702/702 [00:03<00:00, 222.64it/s]


Epoch 3: Train Acc=0.8041, Val Acc=0.3469, Val F1=0.2046


100%|██████████| 702/702 [00:03<00:00, 203.74it/s]


Epoch 4: Train Acc=0.8256, Val Acc=0.3259, Val F1=0.2152


100%|██████████| 702/702 [00:02<00:00, 314.90it/s]


Epoch 5: Train Acc=0.8365, Val Acc=0.3505, Val F1=0.2121


100%|██████████| 702/702 [00:02<00:00, 318.99it/s]


Epoch 6: Train Acc=0.8531, Val Acc=0.3663, Val F1=0.2252


100%|██████████| 702/702 [00:03<00:00, 221.32it/s]


Epoch 7: Train Acc=0.8618, Val Acc=0.3559, Val F1=0.2269


100%|██████████| 702/702 [00:03<00:00, 208.26it/s]


Epoch 8: Train Acc=0.8690, Val Acc=0.3878, Val F1=0.2349


100%|██████████| 702/702 [00:02<00:00, 314.77it/s]


Epoch 9: Train Acc=0.8785, Val Acc=0.3732, Val F1=0.2301


100%|██████████| 702/702 [00:02<00:00, 304.89it/s]


Epoch 10: Train Acc=0.8830, Val Acc=0.3647, Val F1=0.2251
