In [1]:
import pandas as pd
import numpy as np
import torch
from torchvision.datasets import Caltech256, Caltech101
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers

from library.taxonomy import Taxonomy
from library.models import UniversalResNetModel
from library.datasets import (
    Caltech256DataModule,
    Caltech101DataModule,
    CombinedDataModule,
)

# Load dataset information
caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech101_labels = Caltech101(root="datasets/caltech101", download=False).categories

print(f"Caltech-256 classes: {len(caltech256_labels)}")
print(f"Caltech-101 classes: {len(caltech101_labels)}")

# Reduce the precision of matrix multiplication to speed up training
torch.set_float32_matmul_precision("medium")

Caltech-256 classes: 257
Caltech-101 classes: 101


In [2]:
# Load both taxonomies created from the real-world datasets
hypothesis_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_hypothesis.pkl")
mcfp_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_mcfp.pkl")

In [3]:
# Configuration for Multi-Domain Training

# Training configuration
TRAIN = False  # Set to True to train model from scratch

# Create individual dataset modules
caltech101_dm = Caltech101DataModule(batch_size=32)
caltech256_dm = Caltech256DataModule(batch_size=32)

# Create combined data module with domain IDs
# Domain 0: Caltech-101, Domain 1: Caltech-256
dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm],
    domain_ids=[0, 1],
    batch_size=64,
    num_workers=11,
)

dataset_name = "Caltech-101 + Caltech-256 (Multi-Domain)"

# Configuration for both taxonomies
taxonomies_config = {
    "hypothesis": {
        "taxonomy": hypothesis_taxonomy,
        "model_name": "universal-resnet50-hypothesis-multi-domain-min-val-loss",
        "logger_name": "universal_hypothesis_multi_domain",
    },
    "mcfp": {
        "taxonomy": mcfp_taxonomy,
        "model_name": "universal-resnet50-mcfp-multi-domain-min-val-loss",
        "logger_name": "universal_mcfp_multi_domain",
    },
}

In [4]:
# Training configuration (shared for both models)
training_config = {
    "max_epochs": 50,
    "optim": "adamw",
    "optim_kwargs": {
        "lr": 0.00005,  # Reduced from 0.0001
        "weight_decay": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-8,
    },
    "lr_scheduler": "cosine",  # Changed from multistep
    "lr_scheduler_kwargs": {
        "T_max": 50,  # matches max_epochs
        "eta_min": 1e-7,
    },
}

In [None]:
# Train models for both taxonomies
results = {}

for taxonomy_name, config in taxonomies_config.items():
    # Create the Universal ResNet model for this taxonomy
    model = UniversalResNetModel(
        taxonomy=config["taxonomy"],
        architecture="resnet50",
        optim=training_config["optim"],
        optim_kwargs=training_config["optim_kwargs"],
        lr_scheduler=training_config["lr_scheduler"],
        lr_scheduler_kwargs=training_config["lr_scheduler_kwargs"],
    )

    # Setup trainer
    if TRAIN:
        tb_logger = pl_loggers.TensorBoardLogger(
            save_dir="logs", name=config["logger_name"]
        )

        trainer = Trainer(
            max_epochs=training_config["max_epochs"],
            logger=tb_logger,
            callbacks=[
                ModelCheckpoint(
                    dirpath="checkpoints",
                    monitor="val_accuracy",
                    mode="max",
                    save_top_k=1,
                    filename=config["model_name"],
                    enable_version_counter=False,
                )
            ],
        )

        # Train the model
        trainer.fit(model, datamodule=dataset_module)

        # Test the trained model
        test_results = trainer.test(datamodule=dataset_module, ckpt_path="best")

    else:
        trainer = Trainer(
            logger=False,
            enable_checkpointing=False,
        )

        # Load pre-trained model
        print(f"Loading pre-trained model: {config['model_name']}.ckpt")
        model = UniversalResNetModel.load_from_checkpoint(
            f"checkpoints/{config['model_name']}.ckpt",
            taxonomy=config[
                "taxonomy"
            ],  # Need to pass taxonomy since it's not serialized
        )

        # Test the loaded model
        test_results = trainer.test(model, datamodule=dataset_module)

    # Store results
    results[taxonomy_name] = test_results

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0:   3%|▎         | 2/62 [00:01<00:51,  1.16it/s]

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Testing DataLoader 0: 100%|██████████| 62/62 [00:09<00:00,  6.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.840081512928009
        eval_loss           2.2472734451293945
        hp_metric            0.840081512928009
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 23. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/bjoern/dev/master-thesis/project/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | ResNet           | 26.0 M | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
26.0 M    Trainable params
0         Non-trainable params
26.0 M    Total par

Epoch 49: 100%|██████████| 492/492 [02:29<00:00,  3.30it/s, v_num=4]       

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 492/492 [02:29<00:00,  3.30it/s, v_num=4]


Restoring states from the checkpoint path at /home/bjoern/dev/master-thesis/project/checkpoints/universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/bjoern/dev/master-thesis/project/checkpoints/universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  5.93it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8230201005935669
        eval_loss           1.6218763589859009
        hp_metric           0.8230201005935669
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [6]:
# Create individual combined data modules for each domain
# These maintain the (target, domain_id) tuple format expected by the universal models
caltech101_combined_dm = CombinedDataModule(
    dataset_modules=[caltech101_dm],
    domain_ids=[0],  # Domain 0 for Caltech-101
    batch_size=64,
    num_workers=11,
)

caltech256_combined_dm = CombinedDataModule(
    dataset_modules=[caltech256_dm],
    domain_ids=[1],  # Domain 1 for Caltech-256
    batch_size=64,
    num_workers=11,
)

# Test each model on individual domains
domain_results = {}
for taxonomy_name, config in taxonomies_config.items():
    # Load the trained model
    print(f"Loading pre-trained model: {config['model_name']}.ckpt")
    model = UniversalResNetModel.load_from_checkpoint(
        f"checkpoints/{config['model_name']}.ckpt", taxonomy=config["taxonomy"]
    )

    # Create trainer for testing
    trainer = Trainer(
        logger=False,
        enable_checkpointing=False,
    )

    domain_results[taxonomy_name] = {}

    # Test on Caltech-101 (Domain 0)
    caltech101_results = trainer.test(model, datamodule=caltech101_combined_dm)
    domain_results[taxonomy_name]["caltech101"] = caltech101_results[0]
    print(f"Caltech-101 Test Accuracy: {caltech101_results[0]['eval_accuracy']:.4f}")

    # Test on Caltech-256 (Domain 1)
    caltech256_results = trainer.test(model, datamodule=caltech256_combined_dm)
    domain_results[taxonomy_name]["caltech256"] = caltech256_results[0]
    print(f"Caltech-256 Test Accuracy: {caltech256_results[0]['eval_accuracy']:.4f}")

Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9134948253631592
        eval_loss            2.688892364501953
        hp_metric           0.9134948253631592
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 35. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Caltech-101 Test Accuracy: 0.9135
Testing DataLoader 0: 100%|██████████| 48/48 [00:05<00:00,  8.78it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8209150433540344
        eval_loss           2.0995938777923584
        hp_metric           0.8209150433540344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 52. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Caltech-256 Test Accuracy: 0.8209
Loading pre-trained model: universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  9.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9158016443252563
        eval_loss           2.0957741737365723
        hp_metric           0.9158016443252563
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Caltech-101 Test Accuracy: 0.9158
Testing DataLoader 0: 100%|██████████| 48/48 [00:05<00:00,  8.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8055555820465088
        eval_loss            1.485878348350525
        hp_metric           0.8055555820465088
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Caltech-256 Test Accuracy: 0.8056
