In [16]:
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import seaborn as sns
import numpy as np
from collections import Counter

In [17]:
X, y = make_multilabel_classification(n_samples=10000, n_features=10, n_classes=3, n_labels=2)

In [18]:
# convert to tensor
X_torch = torch.FloatTensor(X)
y_torch = torch.FloatTensor(y)

In [19]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.2)

### Dataset

In [20]:
class MultilabelDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [21]:
train_data = MultilabelDataset(X_train, y_train)
test_data = MultilabelDataset(X_test, y_test)

### DataLoader

In [22]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)

### Model

In [23]:
class MultilabelNet(nn.Module):
    def __init__(self, num_features, num_classes, hidden):
        super().__init__()
        self.fc1 = nn.Linear(num_features, hidden)
        self.fc2 = nn.Linear(hidden, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)

        return x 

In [24]:
num_features = train_data.X.shape[1]
hidden = 24
num_classes = train_data.y.shape[1]

In [25]:
model = MultilabelNet(num_features, num_classes, hidden)

In [26]:
# Setup loss function and optimizer
criterion = nn.BCEWithLogitsLoss() # Tự động áp dụng sigmoid

# Setup optimizer
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Trainning

In [27]:
num_epochs = 100
losses = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    num_batches = 0
    for idx, (X_batch, y_batch) in enumerate(train_loader):
        # set optimization
        optimizer.zero_grad()

        # forward pass
        y_pred = model(X_batch)

        # compute loss
        loss = criterion(y_pred, y_batch)

        # backward
        loss.backward()

        # update weights
        optimizer.step()

        running_loss += loss.item()
        num_batches += 1
    
    # Loss trung bình của epoch
    epoch_loss = running_loss / num_batches
    losses.append(epoch_loss)
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Average Loss: {epoch_loss:.10f}')


Epoch 0, Average Loss: 0.3074058051
Epoch 10, Average Loss: 0.2092297895
Epoch 20, Average Loss: 0.2065670801
Epoch 30, Average Loss: 0.2057382312
Epoch 40, Average Loss: 0.2077238427
Epoch 50, Average Loss: 0.2060286120
Epoch 60, Average Loss: 0.2049117711
Epoch 70, Average Loss: 0.2039223326
Epoch 80, Average Loss: 0.2067437657
Epoch 90, Average Loss: 0.2042003299


### Model evaluation

In [28]:
# Prediction 
model.eval()
with torch.no_grad():
    y_test_logits = model(X_test)
    y_test_probs = torch.sigmoid(y_test_logits)  # Apply sigmoid manually
    y_test_pred = (y_test_probs > 0.5).float()   # Threshold at 0.5

# Evaluation 
accuracy = accuracy_score(y_test.numpy(), y_test_pred.numpy())
print(f"Subset Accuracy: {accuracy:.4f}")

Subset Accuracy: 0.7820


In [29]:
from sklearn.metrics import hamming_loss, f1_score, jaccard_score

# Convert to numpy for sklearn
y_test_np = y_test.numpy()
y_pred_np = y_test_pred.numpy()

# Multi-label metrics
print("=== Multi-label Evaluation ===")
print(f"Subset Accuracy: {accuracy_score(y_test_np, y_pred_np):.4f}")
print(f"Hamming Loss: {hamming_loss(y_test_np, y_pred_np):.4f}")
print(f"F1 Micro: {f1_score(y_test_np, y_pred_np, average='micro'):.4f}")
print(f"F1 Macro: {f1_score(y_test_np, y_pred_np, average='macro'):.4f}")
print(f"Jaccard Score: {jaccard_score(y_test_np, y_pred_np, average='micro'):.4f}")

=== Multi-label Evaluation ===
Subset Accuracy: 0.7820
Hamming Loss: 0.0840
F1 Micro: 0.9206
F1 Macro: 0.9160
Jaccard Score: 0.8528
