In [1]:
# import statements for python, torch and companion libraries and your own modules
import os
import sys
import json
import random
import numpy as np
from glob import glob
from pathlib import Path
from typing import Dict, List, Tuple, Any

from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset

import torchvision.transforms as transforms
from torchvision.models import shufflenet_v2_x1_0, ShuffleNet_V2_X1_0_Weights
from PIL import Image

# Import dataset classes from dataset.py for Windows multiprocessing support
from dataset import COCOTrainImageDataset, COCOTestImageDataset, ValidationDataset

print("All libraries imported successfully")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

All libraries imported successfully
PyTorch version: 2.5.1
CUDA available: True
CUDA device: NVIDIA GeForce RTX 4050 Laptop GPU


In [2]:
def set_seed(seed: int = 42):
    """Set random seed to ensure reproducibility of results"""
    random.seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = False  
    torch.backends.cudnn.benchmark = True  

set_seed(42)

In [3]:
# global variables defining training hyper-parameters among other things 
BATCH_SIZE = 64  
NUM_EPOCHS = 30
LEARNING_RATE = 1e-4  
WEIGHT_DECAY = 1e-5
NUM_CLASSES = 80
VALIDATION_SPLIT = 0.1
THRESHOLD = 0.5

print("Global variables and hyperparameters defined:")
print(f"  - Batch size: {BATCH_SIZE}")
print(f"  - Number of epochs: {NUM_EPOCHS}")
print(f"  - Learning rate: {LEARNING_RATE}")
print(f"  - Validation split: {VALIDATION_SPLIT}")
print(f"  - Threshold: {THRESHOLD}")

# device initialization
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Global variables and hyperparameters defined:
  - Batch size: 64
  - Number of epochs: 15
  - Learning rate: 0.0001
  - Validation split: 0.1
  - Threshold: 0.5
Using device: cuda


In [4]:
# data directories initialization
DATA_DIR = "ms-coco"
TRAIN_IMG_DIR = os.path.join(DATA_DIR, "images", "train-resized", "train-resized")
TEST_IMG_DIR = os.path.join(DATA_DIR, "images", "test-resized", "test-resized")
TRAIN_LABELS_DIR = os.path.join(DATA_DIR, "labels", "train")
MODEL_SAVE_PATH = "best_coco_shuffle_model.pth"
OUTPUT_JSON_FILE = "coco_predictions_shuffle_v3.json"


In [5]:
# class definitions
classes = ("person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 
           "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
           "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",       
           "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
           "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
           "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 
           "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 
           "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 
           "hair drier", "toothbrush")

In [6]:
print("Data directories and class names defined:")
print(f"  - Training images: {TRAIN_IMG_DIR}")
print(f"  - Test images: {TEST_IMG_DIR}")
print(f"  - Training labels: {TRAIN_LABELS_DIR}")
print(f"  - Dataset contains {NUM_CLASSES} classes")

Data directories and class names defined:
  - Training images: ms-coco\images\train-resized\train-resized
  - Test images: ms-coco\images\test-resized\test-resized
  - Training labels: ms-coco\labels\train
  - Dataset contains 80 classes


In [7]:
# COCOTrainImageDataset class has been moved to dataset.py for Windows multiprocessing support
print("COCOTrainImageDataset imported from dataset.py")

COCOTrainImageDataset imported from dataset.py


In [8]:
# COCOTestImageDataset class has been moved to dataset.py for Windows multiprocessing support
print("COCOTestImageDataset imported from dataset.py")

COCOTestImageDataset imported from dataset.py


In [9]:
# instantiation of transforms, datasets and data loaders
# TIP : use torch.utils.data.random_split to split the training set into train and validation subsets
train_transforms = transforms.Compose([
    transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BILINEAR), 
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create full training dataset
print("Loading dataset...")
full_train_dataset = COCOTrainImageDataset(
    img_dir=TRAIN_IMG_DIR,
    annotations_dir=TRAIN_LABELS_DIR,
    transform=train_transforms
)

print(f"Full training dataset size: {len(full_train_dataset)}")

# Split training data into train and validation subsets using torch.utils.data.random_split
train_size = int((1 - VALIDATION_SPLIT) * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = random_split(
    full_train_dataset, 
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

print(f"Training set size: {len(train_dataset)}")
print(f"✅ Validation set size: {len(val_dataset)}")

Loading dataset...
Full training dataset size: 65000
Training set size: 58500
✅ Validation set size: 6500


In [10]:
# ValidationDataset class has been moved to dataset.py for Windows multiprocessing support
print("ValidationDataset imported from dataset.py")


ValidationDataset imported from dataset.py


In [11]:
val_dataset_transformed = ValidationDataset(val_dataset, val_transforms)

# Create data loaders with Windows-compatible multiprocessing settings
# For Windows, we can now use num_workers > 0 since dataset classes are in separate .py file

train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=4,  
    pin_memory=True,  
    drop_last=True,
    persistent_workers=True  # Keep workers alive between epochs
)

val_loader = DataLoader(
    val_dataset_transformed, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=4,  
    pin_memory=True,
    persistent_workers=True
)

print("Data loaders created successfully with Windows multiprocessing support")
print(f"  - Training loader: {len(train_loader)} batches, {train_loader.num_workers} workers")
print(f"  - Validation loader: {len(val_loader)} batches, {val_loader.num_workers} workers")

Data loaders created successfully with Windows multiprocessing support
  - Training loader: 914 batches, 4 workers
  - Validation loader: 102 batches, 4 workers


In [12]:
class COCOMultiLabelClassifier(nn.Module):
    """Multi-label classifier based on ShuffleNet V2 x1.0"""
    def __init__(self, num_classes: int = 80, pretrained: bool = True):
        super(COCOMultiLabelClassifier, self).__init__()
        
        # Use pre-trained ShuffleNet V2 x1.0 as backbone
        if pretrained:
            self.backbone = shufflenet_v2_x1_0(weights=ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1)
        else:
            self.backbone = shufflenet_v2_x1_0(weights=None)
        
        # Get feature dimension (ShuffleNet V2 x1.0 has 1024 output features)
        in_features = self.backbone.fc.in_features
        
        # Replace classification head with multi-label classification head
        self.backbone.fc = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.backbone(x)

In [13]:
# instantiation and preparation of network model
print("Initializing model...")
model = COCOMultiLabelClassifier(num_classes=NUM_CLASSES, pretrained=True)
model = model.to(device)

print(f"Model loaded to device: {device}")
print(f"  - Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"  - Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")


Initializing model...
Model loaded to device: cuda
  - Total parameters: 1,819,444
  - Trainable parameters: 1,819,444


In [14]:
# Metrics for select the best model
def calculate_f1_metrics(predictions, labels, threshold=0.5):

    predictions_binary = (predictions > threshold).float()
    tp = (predictions_binary * labels).sum()
    fp = (predictions_binary * (1 - labels)).sum() 
    fn = ((1 - predictions_binary) * labels).sum()
    
    micro_precision = tp / (tp + fp + 1e-8)
    micro_recall = tp / (tp + fn + 1e-8)
    micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall + 1e-8)
    
    class_f1s = []
    for c in range(labels.shape[1]):
        tp_c = (predictions_binary[:, c] * labels[:, c]).sum()
        fp_c = (predictions_binary[:, c] * (1 - labels[:, c])).sum()
        fn_c = ((1 - predictions_binary[:, c]) * labels[:, c]).sum()
        
        prec_c = tp_c / (tp_c + fp_c + 1e-8)
        rec_c = tp_c / (tp_c + fn_c + 1e-8)
        f1_c = 2 * prec_c * rec_c / (prec_c + rec_c + 1e-8)
        class_f1s.append(f1_c)
    
    macro_f1 = torch.stack(class_f1s).mean()
    return float(micro_f1), float(macro_f1)

In [15]:
def train_loop(train_loader: DataLoader, net: nn.Module, criterion: nn.Module, 
               optimizer: optim.Optimizer, device: torch.device) -> float:

    net.train()
    running_loss = 0.0
    
    for images, labels in tqdm(train_loader, desc="Training",position=0, leave=True):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    return epoch_loss

In [16]:
def validation_loop(val_loader: DataLoader, net: nn.Module, criterion: nn.Module, 
                   device: torch.device) -> Dict[str, float]:

    net.eval()
    val_loss = 0.0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validating",position=0, leave=True):
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            batch_loss = criterion(outputs, labels)
            val_loss += batch_loss.item() * images.size(0)
            
            probabilities = torch.sigmoid(outputs)
            
            all_predictions.append(probabilities.cpu())  # save the probabilities instead of predictions
            all_labels.append(labels.cpu())
    
    val_loss /= len(val_loader.dataset)
    
    all_predictions = torch.cat(all_predictions, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    micro_f1, macro_f1 = calculate_f1_metrics(all_predictions, all_labels)

    predictions_binary = (all_predictions > THRESHOLD).float()
    exact_match = (all_predictions == all_labels).all(dim=1).float().mean().item()
    
    sample_accuracy = ((all_predictions == all_labels).float().mean(dim=1)).mean().item()
    
    return {
        'loss': val_loss,
        'exact_match_accuracy': exact_match,
        'sample_accuracy': sample_accuracy,
        'micro_f1': micro_f1,
        'macro_f1': macro_f1,
        'predictions': all_predictions,
        'labels': all_labels
    }
    

In [17]:
# instantiation of loss criterion
# instantiation of optimizer, registration of network parameters

criterion = nn.BCEWithLogitsLoss()
print("Loss criterion initialized: BCEWithLogitsLoss")

optimizer = optim.AdamW(
    model.parameters(), 
    lr=LEARNING_RATE, 
    weight_decay=WEIGHT_DECAY
)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, 
    T_max=NUM_EPOCHS, 
    eta_min=1e-6
)

print("Optimizer and scheduler initialized:")
print(f"  - Optimizer: AdamW")
print(f"  - Learning rate: {LEARNING_RATE}")
print(f"  - Weight decay: {WEIGHT_DECAY}")
print(f"  - Scheduler: CosineAnnealingLR")

Loss criterion initialized: BCEWithLogitsLoss
Optimizer and scheduler initialized:
  - Optimizer: AdamW
  - Learning rate: 0.0001
  - Weight decay: 1e-05
  - Scheduler: CosineAnnealingLR


In [18]:
log_dir = "runs/coco_multi_label_shuffle"
os.makedirs(log_dir, exist_ok=True)

print(f"Logs will be saved to: {log_dir}")
print("   To view logs, run: tensorboard --logdir=runs")

Logs will be saved to: runs/coco_multi_label_shuffle
   To view logs, run: tensorboard --logdir=runs


In [19]:
# for multiprocessing in windows+jupyter, it's better to put the training process in '__main__' for avoiding pickle problem
if __name__ == '__main__' or 'ipykernel' in sys.modules: 
    print("Starting training...")
    print("=" * 60)

    best_val_loss = float('inf')
    best_val_micro_f1 = 0.0
    best_val_macro_f1 = 0.0

    for epoch in tqdm(range(NUM_EPOCHS)):
        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
        print("-" * 30)
        
        train_loss = train_loop(train_loader, model, criterion, optimizer, device)
        
        val_results = validation_loop(val_loader, model, criterion, device)

        scheduler.step()
        
        print(f"Training Loss: {train_loss:.4f}")
        print(f"Validation Loss: {val_results['loss']:.4f}")
        print(f"Exact Match Accuracy: {val_results['exact_match_accuracy']:.4f}")
        print(f"Sample Accuracy: {val_results['sample_accuracy']:.4f}")
        print(f"Micro F1: {val_results['micro_f1']:.4f}")
        print(f"Macro F1: {val_results['macro_f1']:.4f}")
        print(f"Current learning rate: {scheduler.get_last_lr()[0]:.2e}")
        
        # Model selection: save best model based on validation loss
        '''
        if val_results['loss'] < best_val_loss:
            best_val_loss = val_results['loss']
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_loss': best_val_loss,
                'train_loss': train_loss,
                'val_results': val_results,
            }, MODEL_SAVE_PATH)
            print(f"New best model saved (Validation Loss: {best_val_loss:.4f})")
        '''
        '''
        # Model selection: save best model based on micro f1
        if val_results['micro_f1'] > best_val_micro_f1:
            best_val_micro_f1 = val_results['micro_f1']
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_micro_f1': best_val_micro_f1,
                'train_loss': train_loss,
                'val_results': val_results,
            }, MODEL_SAVE_PATH)
            print(f"New best model saved (Micro F1: {best_val_micro_f1:.4f})")
        '''
        
        # Model selection: save best model based on macro f1
        if val_results['macro_f1'] > best_val_macro_f1:
            best_val_macro_f1 = val_results['macro_f1']
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_macro_f1': best_val_macro_f1,
                'train_loss': train_loss,
                'val_results': val_results,
            }, MODEL_SAVE_PATH)
            print(f"New best model saved (Macro F1: {best_val_macro_f1:.4f})")
        
        
    print("\nTraining completed!")
    print(f"Best model saved to: {MODEL_SAVE_PATH}")

Starting training...


  0%|          | 0/15 [00:00<?, ?it/s]


Epoch 1/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.1687
Validation Loss: 0.1089
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.2811
Macro F1: 0.0148
Current learning rate: 9.89e-05
New best model saved (Macro F1: 0.0148)

Epoch 2/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.1011
Validation Loss: 0.0921
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.3454
Macro F1: 0.0590
Current learning rate: 9.57e-05
New best model saved (Macro F1: 0.0590)

Epoch 3/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0907
Validation Loss: 0.0842
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.4287
Macro F1: 0.1680
Current learning rate: 9.05e-05
New best model saved (Macro F1: 0.1680)

Epoch 4/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0837
Validation Loss: 0.0786
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.4900
Macro F1: 0.2555
Current learning rate: 8.36e-05
New best model saved (Macro F1: 0.2555)

Epoch 5/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0785
Validation Loss: 0.0752
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5255
Macro F1: 0.3113
Current learning rate: 7.52e-05
New best model saved (Macro F1: 0.3113)

Epoch 6/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0748
Validation Loss: 0.0727
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5418
Macro F1: 0.3536
Current learning rate: 6.58e-05
New best model saved (Macro F1: 0.3536)

Epoch 7/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0722
Validation Loss: 0.0711
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5575
Macro F1: 0.3791
Current learning rate: 5.57e-05
New best model saved (Macro F1: 0.3791)

Epoch 8/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0701
Validation Loss: 0.0701
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5769
Macro F1: 0.4138
Current learning rate: 4.53e-05
New best model saved (Macro F1: 0.4138)

Epoch 9/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0685
Validation Loss: 0.0692
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5854
Macro F1: 0.4280
Current learning rate: 3.52e-05
New best model saved (Macro F1: 0.4280)

Epoch 10/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0673
Validation Loss: 0.0688
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5867
Macro F1: 0.4354
Current learning rate: 2.58e-05
New best model saved (Macro F1: 0.4354)

Epoch 11/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0664
Validation Loss: 0.0685
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5936
Macro F1: 0.4436
Current learning rate: 1.74e-05
New best model saved (Macro F1: 0.4436)

Epoch 12/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0658
Validation Loss: 0.0683
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5943
Macro F1: 0.4471
Current learning rate: 1.05e-05
New best model saved (Macro F1: 0.4471)

Epoch 13/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0653
Validation Loss: 0.0682
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5936
Macro F1: 0.4492
Current learning rate: 5.28e-06
New best model saved (Macro F1: 0.4492)

Epoch 14/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0650
Validation Loss: 0.0683
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5920
Macro F1: 0.4437
Current learning rate: 2.08e-06

Epoch 15/15
------------------------------


Training:   0%|          | 0/914 [00:00<?, ?it/s]

Validating:   0%|          | 0/102 [00:00<?, ?it/s]

Training Loss: 0.0648
Validation Loss: 0.0681
Exact Match Accuracy: 0.0000
Sample Accuracy: 0.0000
Micro F1: 0.5971
Macro F1: 0.4529
Current learning rate: 1.00e-06
New best model saved (Macro F1: 0.4529)

Training completed!
Best model saved to: best_coco_shuffle_model.pth


In [21]:
print("=" * 60)
print("Starting test prediction program")
print("=" * 60)

BATCH_SIZE_TEST = 64

print(f"Test inference hyperparameters:")
print(f"  - Test batch size: {BATCH_SIZE_TEST}")

Starting test prediction program
Test inference hyperparameters:
  - Test batch size: 64


In [22]:
print(f"Test directories and files:")
print(f"  - Test images: {TEST_IMG_DIR}")
print(f"  - Trained model: {MODEL_SAVE_PATH}")
print(f"  - Output JSON: {OUTPUT_JSON_FILE}")


Test directories and files:
  - Test images: ms-coco\images\test-resized\test-resized
  - Trained model: best_coco_shuffle_model.pth
  - Output JSON: coco_predictions_shuffle_v3.json


In [23]:
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = COCOTestImageDataset(
    img_dir=TEST_IMG_DIR,
    transform=test_transforms
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE_TEST,
    shuffle=False,  # No shuffling needed for testing
    num_workers=4,  
    pin_memory=True if device.type == 'cuda' else False,
    persistent_workers=True
)

print(f"Test dataset size: {len(test_dataset)}")
print(f"Test batch count: {len(test_loader)}")
print(f"Test loader using {test_loader.num_workers} workers")

Test dataset size: 4952
Test batch count: 78
Test loader using 4 workers


In [25]:
test_model = COCOMultiLabelClassifier(num_classes=NUM_CLASSES, pretrained=False)

if os.path.exists(MODEL_SAVE_PATH):
    checkpoint = torch.load(MODEL_SAVE_PATH, map_location=device)
    test_model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Successfully loaded model weights from: {MODEL_SAVE_PATH}")
    print(f"Model training epoch: {checkpoint['epoch']}")
    print(f"Best validation loss: {checkpoint['best_val_macro_f1']:.4f}")
else:
    print(f"Trained model file not found: {MODEL_SAVE_PATH}")
    print("Please run the training program first")
    raise FileNotFoundError(f"Model file not found: {MODEL_SAVE_PATH}")

test_model = test_model.to(device)
test_model.eval()
print("Model ready for inference")

  checkpoint = torch.load(MODEL_SAVE_PATH, map_location=device)


Successfully loaded model weights from: best_coco_shuffle_model.pth
Model training epoch: 15
Best validation loss: 0.4529
Model ready for inference


In [26]:
predictions_dict = {}
print("Output dictionary initialized")


Output dictionary initialized


In [27]:
print("Starting prediction loop...")
print("-" * 40)

with torch.no_grad():
    for batch_idx, (images, filenames) in enumerate(tqdm(test_loader, desc="Predicting")):
        # Get mini-batch
        images = images.to(device)
        
        outputs = test_model(images)
        
        probabilities = torch.sigmoid(outputs)
        predictions = (probabilities > THRESHOLD).cpu().numpy()
        
        # Update dictionary entries, write corresponding class indices
        for i, filename in enumerate(filenames):
            predicted_classes = []
            for class_idx in range(NUM_CLASSES):
                if predictions[i, class_idx]:
                    predicted_classes.append(class_idx)
            
            predictions_dict[filename] = predicted_classes

print(f"Prediction completed, processed {len(predictions_dict)} images")

Starting prediction loop...
----------------------------------------


Predicting:   0%|          | 0/78 [00:00<?, ?it/s]

Prediction completed, processed 4952 images


In [28]:
print(f"Saving prediction results to: {OUTPUT_JSON_FILE}")

# Show some sample predictions
sample_count = 0
for filename, predicted_classes in predictions_dict.items():
    if sample_count < 5:  # Show only first 5 samples
        print(f"  Sample {filename}: predicted classes {predicted_classes}")
        sample_count += 1

try:
    with open(OUTPUT_JSON_FILE, 'w') as f:
        json.dump(predictions_dict, f, indent=2)
    print(f"JSON file successfully saved to: {OUTPUT_JSON_FILE}")
    
    # Check file size
    file_size = os.path.getsize(OUTPUT_JSON_FILE)
    print(f"File size: {file_size / 1024:.2f} KB")
    
except Exception as e:
    print(f"Error saving JSON file: {e}")
    raise

print("=" * 60)
print("Test prediction program completed!")

Saving prediction results to: coco_predictions_shuffle_v3.json
  Sample 000000000139: predicted classes [56, 57, 58, 60, 62]
  Sample 000000000285: predicted classes []
  Sample 000000000632: predicted classes [56, 57, 58, 62, 73]
  Sample 000000000724: predicted classes [11]
  Sample 000000000776: predicted classes [77]
JSON file successfully saved to: coco_predictions_shuffle_v3.json
File size: 196.40 KB
Test prediction program completed!
