In [1]:
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
from torch.nn.functional import sigmoid
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

import numpy as np

In [2]:
class EmojiDataset(Dataset):
    def __init__(self, rgb_images, labels, transform=None):
        self.rgb_images = rgb_images  # (num, 224, 224, 3)
        self.labels = labels  # (num, 11)
        self.transform = transform

    def __len__(self):
        return len(self.rgb_images)

    def __getitem__(self, idx):
        # Convert to PyTorch tensors
        rgb = torch.tensor(self.rgb_images[idx], dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        # Permute from (H, W, C) → (C, H, W)
        rgb = rgb.permute(2, 0, 1)  # (3, 224, 224)
        # Normalize image pixels to [0,1]
        rgb /= 255.0

        if self.transform:
            rgb = self.transform(rgb)
        
        return rgb, label

In [3]:
# Load images and labels
rgb_train= np.load("img_train_plaintext_top11_rgb.npy")  # Shape: (num_train, 224, 224, 3)
labels_train = np.load("img_train_plaintext_top11_labels.npy")  # Shape: (num_train, 11) - Multi-label one-hot

rgb_valid= np.load("img_valid_plaintext_top11_rgb.npy")  # Shape: (num_valid, 224, 224, 3)
labels_valid = np.load("img_valid_plaintext_top11_labels.npy")  # Shape: (num_valid, 11)

rgb_test = np.load("img_test_plaintext_top11_rgb.npy")  # Shape: (num_valid, 224, 224, 3)
labels_test = np.load("img_test_plaintext_top11_labels.npy")  # Shape: (num_valid, 11)

print(rgb_train.shape, rgb_train.dtype, type(rgb_train[0]), rgb_train[0].shape)
print(labels_train.shape, labels_train.dtype, type(labels_train[0]), labels_train[0], labels_train[0][4])

(109176, 224, 224, 3) uint8 <class 'numpy.ndarray'> (224, 224, 3)
(109176, 11) int64 <class 'numpy.ndarray'> [0 0 0 0 1 0 0 0 0 0 0] 1


In [4]:
# Define transformations (optional)
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.RandomRotation(10),
])

# Create Dataset instances
train_dataset = EmojiDataset(rgb_train, labels_train, transform=transform)
valid_dataset = EmojiDataset(rgb_valid, labels_valid, transform=None)
test_dataset = EmojiDataset(rgb_test, labels_test, transform=None)

# Create DataLoaders
batch_size = 32  # Adjust as needed
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
rgb, label = train_dataset[0]

print("Single image shape:", rgb.shape)  # Expected: (3, 224, 224)
print("Single label shape:", label.shape)  # Expected: (11,)

Single image shape: torch.Size([3, 224, 224])
Single label shape: torch.Size([11])


In [6]:
for rgb, labels in train_loader:
    print("Batch image shape:", rgb.shape)  # Expected: (32, 3, 224, 224)
    print("Batch label shape:", labels.shape)  # Expected: (32, 11)
    break

Batch image shape: torch.Size([32, 3, 224, 224])
Batch label shape: torch.Size([32, 11])


In [7]:
# Load Pretrained ResNet18
model = models.resnet50(weights="DEFAULT")  

num_classes = 11  # Top 11

# Modify the last layer
model.fc = nn.Linear(model.fc.in_features, num_classes)
print(model)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [8]:
# Define loss function & optimizer
criterion = nn.BCEWithLogitsLoss()  # Multi-label loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 20  # Adjust as needed

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, labels) 
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # After each epoch, validate on the validation set
    model.eval()  # Set model to evaluation mode
    valid_loss = 0.0
    with torch.no_grad():  # Disable gradient calculation for validation
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device) 

            outputs = model(images)
            loss = criterion(outputs, labels)

            valid_loss += loss.item()
    print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader)} , Valid Loss: {valid_loss/len(valid_loader)}")
    

Epoch 1, Train Loss: 0.27652916725362453 , Valid Loss: 0.2755196407121956
Epoch 2, Train Loss: 0.27309497524432813 , Valid Loss: 0.27262956980201936
Epoch 3, Train Loss: 0.2716500133424473 , Valid Loss: 0.2728515892984375
Epoch 4, Train Loss: 0.27018033512207434 , Valid Loss: 0.270565269169984
Epoch 5, Train Loss: 0.2687623936067153 , Valid Loss: 0.270980664386005
Epoch 6, Train Loss: 0.26742139159934325 , Valid Loss: 0.2693634917023321
Epoch 7, Train Loss: 0.2663545874313201 , Valid Loss: 0.26873559623010573
Epoch 8, Train Loss: 0.26514670461992723 , Valid Loss: 0.2677539094257607
Epoch 9, Train Loss: 0.2641660205076546 , Valid Loss: 0.26674363545324437
Epoch 10, Train Loss: 0.2630939532599793 , Valid Loss: 0.26734411909624384
Epoch 11, Train Loss: 0.2619022278405577 , Valid Loss: 0.2666865127979132
Epoch 12, Train Loss: 0.26051960462335266 , Valid Loss: 0.2655696225938974
Epoch 13, Train Loss: 0.25902257548319774 , Valid Loss: 0.26429600489360316
Epoch 14, Train Loss: 0.2575939503462

In [9]:
model.eval()

for threshold in (0.3, 0.5, 0.7):
    print(f"Threshold: {threshold}")
    
    total = 0
    correct = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            # Convert logits to probabilities
            probs = sigmoid(outputs)  
            # Convert logits to binary, 
            # Threshold at 0.5 (adjustable) to get binary labels
            predicted = (probs > threshold).float()
            
            # Store predictions and actual labels
            all_preds.append(predicted.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
            
            correct += (predicted == labels).sum().item()
            total += labels.numel()  # Count total number of elements

    # Convert list of numpy arrays into a single numpy array
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)

    # Compute confusion matrix, precision, and recall for each label (emoji)
    accuracy = 100 * correct / total
    precision = 100 * precision_score(all_labels, all_preds, average="macro")  # Use 'macro' for multi-label
    recall = 100 * recall_score(all_labels, all_preds, average="macro")
    f1 = 100 * f1_score(all_labels, all_preds, average="macro")

    print(f"Test Accuracy: {accuracy:.2f}%")
    print(f"Test Precision: {precision:.2f}%")
    print(f"Test Recall: {recall:.2f}%")
    print(f"Test F1: {f1:.2f}% \n")

Threshold: 0.3
Test Accuracy: 88.29%
Test Precision: 37.15%
Test Recall: 16.45%
Test F1: 18.25% 

Threshold: 0.5
Test Accuracy: 90.33%
Test Precision: 66.39%
Test Recall: 7.00%
Test F1: 11.46% 

Threshold: 0.7
Test Accuracy: 90.44%
Test Precision: 76.42%
Test Recall: 3.48%
Test F1: 6.49% 



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
