In [1]:
import pandas as pd
import numpy as np
import torch
from torchvision.datasets import Caltech256, Caltech101, CIFAR100
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers

from library.taxonomy import Taxonomy
from library.models import UniversalResNetModel
from library.datasets import (
    Caltech256DataModule,
    Caltech101DataModule,
    CIFAR100ScaledDataModule,
    CIFAR100DataModule,
    CombinedDataModule,
)

# Load dataset information
caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech101_labels = Caltech101(root="datasets/caltech101", download=False).categories
cifar100_labels = CIFAR100(
    root="datasets/cifar100", download=False, train=False
).classes

print(f"Caltech-256 classes: {len(caltech256_labels)}")
print(f"Caltech-101 classes: {len(caltech101_labels)}")
print(f"CIFAR-100 classes: {len(cifar100_labels)}")

# Reduce the precision of matrix multiplication to speed up training
torch.set_float32_matmul_precision("medium")

Caltech-256 classes: 257
Caltech-101 classes: 101
CIFAR-100 classes: 100


In [2]:
# Load both taxonomies created from the real-world datasets
hypothesis_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_hypothesis.pkl")
mcfp_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_mcfp.pkl")

# Load the three-domain taxonomies
three_domain_hypothesis_taxonomy = Taxonomy.load(
    "taxonomies/three_domain_hypothesis.pkl"
)
three_domain_mcfp_taxonomy = Taxonomy.load("taxonomies/three_domain_mcfp.pkl")

In [3]:
# Configuration for Multi-Domain Training

# Training configuration
TRAIN = False  # Set to True to train model from scratch

# Create individual dataset modules
caltech101_dm = Caltech101DataModule(batch_size=32)
caltech256_dm = Caltech256DataModule(batch_size=32)
cifar100_dm = CIFAR100ScaledDataModule(batch_size=32)
cifar100_original_dm = CIFAR100DataModule(batch_size=32)

# Create combined data module with domain IDs
# Domain 0: Caltech-101, Domain 1: Caltech-256
dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm],
    domain_ids=[0, 1],
    batch_size=64,
    num_workers=11,
)

# Create three-domain data module
# Domain 0: Caltech-101, Domain 1: Caltech-256, Domain 2: CIFAR-100
three_domain_dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm, cifar100_dm],
    domain_ids=[0, 1, 2],
    batch_size=64,
    num_workers=11,
)

dataset_name = "Caltech-101 + Caltech-256 (Multi-Domain)"
three_domain_dataset_name = "Caltech-101 + Caltech-256 + CIFAR-100 (Three-Domain)"

# Configuration for both taxonomies
taxonomies_config = {
    "hypothesis": {
        "taxonomy": hypothesis_taxonomy,
        "model_name": "universal-resnet50-hypothesis-multi-domain-min-val-loss",
        "logger_name": "universal_hypothesis_multi_domain",
    },
    "mcfp": {
        "taxonomy": mcfp_taxonomy,
        "model_name": "universal-resnet50-mcfp-multi-domain-min-val-loss",
        "logger_name": "universal_mcfp_multi_domain",
    },
    "three_domain_hypothesis": {
        "taxonomy": three_domain_hypothesis_taxonomy,
        "model_name": "universal-resnet50-three-domain-hypothesis-min-val-loss",
        "logger_name": "universal_three_domain_hypothesis",
    },
    "three_domain_mcfp": {
        "taxonomy": three_domain_mcfp_taxonomy,
        "model_name": "universal-resnet50-three-domain-mcfp-min-val-loss",
        "logger_name": "universal_three_domain_mcfp",
    },
}

In [4]:
# Training configuration (shared for both models)
training_config = {
    "max_epochs": 50,
    "optim": "adamw",
    "optim_kwargs": {
        "lr": 0.00005,  # Reduced from 0.0001
        "weight_decay": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-8,
    },
    "lr_scheduler": "cosine",  # Changed from multistep
    "lr_scheduler_kwargs": {
        "T_max": 50,  # matches max_epochs
        "eta_min": 1e-7,
    },
}

In [5]:
# Train models for both taxonomies
results = {}

for taxonomy_name, config in taxonomies_config.items():
    # Select appropriate dataset module
    if taxonomy_name in ["three_domain_hypothesis", "three_domain_mcfp"]:
        current_dataset_module = three_domain_dataset_module
    else:
        current_dataset_module = dataset_module

    # Create the Universal ResNet model for this taxonomy
    model = UniversalResNetModel(
        taxonomy=config["taxonomy"],
        architecture="resnet50",
        optim=training_config["optim"],
        optim_kwargs=training_config["optim_kwargs"],
        lr_scheduler=training_config["lr_scheduler"],
        lr_scheduler_kwargs=training_config["lr_scheduler_kwargs"],
    )

    # Setup trainer
    if TRAIN:
        tb_logger = pl_loggers.TensorBoardLogger(
            save_dir="logs", name=config["logger_name"]
        )

        trainer = Trainer(
            max_epochs=training_config["max_epochs"],
            logger=tb_logger,
            callbacks=[
                ModelCheckpoint(
                    dirpath="checkpoints",
                    monitor="val_accuracy",
                    mode="max",
                    save_top_k=1,
                    filename=config["model_name"],
                    enable_version_counter=False,
                )
            ],
        )

        # Train the model
        trainer.fit(model, datamodule=current_dataset_module)

        # Test the trained model
        test_results = trainer.test(datamodule=current_dataset_module, ckpt_path="best")

    else:
        trainer = Trainer(
            logger=False,
            enable_checkpointing=False,
        )

        # Load pre-trained model
        print(f"Loading pre-trained model: {config['model_name']}.ckpt")
        model = UniversalResNetModel.load_from_checkpoint(
            f"checkpoints/{config['model_name']}.ckpt",
            taxonomy=config[
                "taxonomy"
            ],  # Need to pass taxonomy since it's not serialized
        )

        # Test the loaded model
        test_results = trainer.test(model, datamodule=current_dataset_module)

    # Store results
    results[taxonomy_name] = test_results

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0:   3%|▎         | 2/62 [00:01<00:46,  1.29it/s]

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Testing DataLoader 0: 100%|██████████| 62/62 [00:09<00:00,  6.76it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8492487668991089
        eval_loss           2.2416648864746094
        hp_metric           0.8492487668991089
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8492487668991089
        eval_loss           2.2416648864746094
 

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 23. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:07<00:00,  7.88it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8250572681427002
        eval_loss            1.626855492591858
        hp_metric           0.8250572681427002
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8250572681427002
        eval_loss            1.626855492591858
 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-hypothesis-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:27<00:00,  7.80it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7890428900718689
        eval_loss           2.1360154151916504
        hp_metric           0.7890428900718689
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7890428900718689
        eval_loss           2.1360154151916504

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 39. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-mcfp-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:27<00:00,  7.82it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676455974578857
        eval_loss           1.8525665998458862
        hp_metric           0.7676455974578857
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676455974578857
        eval_loss           1.8525665998458862

In [6]:
# Create individual combined data modules for each domain
# These maintain the (target, domain_id) tuple format expected by the universal models
caltech101_combined_dm = CombinedDataModule(
    dataset_modules=[caltech101_dm],
    domain_ids=[0],  # Domain 0 for Caltech-101
    batch_size=64,
    num_workers=11,
)

caltech256_combined_dm = CombinedDataModule(
    dataset_modules=[caltech256_dm],
    domain_ids=[1],  # Domain 1 for Caltech-256
    batch_size=64,
    num_workers=11,
)

cifar100_combined_dm = CombinedDataModule(
    dataset_modules=[cifar100_dm],
    domain_ids=[2],  # Domain 2 for CIFAR-100
    batch_size=64,
    num_workers=11,
)

# Test each model on individual domains
domain_results = {}
for taxonomy_name, config in taxonomies_config.items():
    # Load the trained model
    print(f"Loading pre-trained model: {config['model_name']}.ckpt")
    model = UniversalResNetModel.load_from_checkpoint(
        f"checkpoints/{config['model_name']}.ckpt", taxonomy=config["taxonomy"]
    )

    # Create trainer for testing
    trainer = Trainer(
        logger=False,
        enable_checkpointing=False,
    )

    domain_results[taxonomy_name] = {
        "name": taxonomy_name,
    }

    # Test on Caltech-101 (Domain 0)
    caltech101_results = trainer.test(model, datamodule=caltech101_combined_dm)
    domain_results[taxonomy_name]["caltech101"] = caltech101_results[0]["eval_accuracy"]

    # Test on Caltech-256 (Domain 1)
    caltech256_results = trainer.test(model, datamodule=caltech256_combined_dm)
    domain_results[taxonomy_name]["caltech256"] = caltech256_results[0]["eval_accuracy"]

    # Test on CIFAR-100 (Domain 2) - only for three-domain model
    if taxonomy_name in ["three_domain_hypothesis", "three_domain_mcfp"]:
        cifar100_results = trainer.test(model, datamodule=cifar100_combined_dm)
        domain_results[taxonomy_name]["cifar100"] = cifar100_results[0]["eval_accuracy"]

        # Test on all three domains together
        three_domain_results = trainer.test(
            model, datamodule=three_domain_dataset_module
        )
        domain_results[taxonomy_name]["unified"] = three_domain_results[0][
            "eval_accuracy"
        ]
    else:
        # For two-domain models, CIFAR-100 accuracy is N/A
        domain_results[taxonomy_name]["cifar100"] = None

        # Test on original test (both)
        original_results = trainer.test(model, datamodule=dataset_module)
        domain_results[taxonomy_name]["unified"] = original_results[0]["eval_accuracy"]

Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  7.92it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9204152226448059
        eval_loss           2.6933491230010986
        hp_metric           0.9204152226448059
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9204152226448059
        eval_loss           2.6933491230010986
 

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 35. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.76it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8274509906768799
        eval_loss           2.1112678050994873
        hp_metric           0.8274509906768799
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8274509906768799
        eval_loss           2.1112678050994873
 

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 52. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:08<00:00,  7.15it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8469569683074951
        eval_loss            2.235304355621338
        hp_metric           0.8469569683074951
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8469569683074951
        eval_loss            2.235304355621338
 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.25it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9088811874389648
        eval_loss            2.092172622680664
        hp_metric           0.9088811874389648
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.25it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy       

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.81it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8049019575119019
        eval_loss           1.4988317489624023
        hp_metric           0.8049019575119019
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8049019575119019
        eval_loss           1.4988317489624023
 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:07<00:00,  7.95it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8270944952964783
        eval_loss           1.6252721548080444
        hp_metric           0.8270944952964783
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8270944952964783
        eval_loss           1.6252721548080444
 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9573240876197815
        eval_loss            3.164580821990967
        hp_metric           0.9573240876197815
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy       

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:07<00:00,  6.85it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8669934868812561
        eval_loss            2.569204807281494
        hp_metric           0.8669934868812561
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8669934868812561
        eval_loss            2.569204807281494
 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:18<00:00,  8.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7519999742507935
        eval_loss           1.9138060808181763
        hp_metric           0.7519999742507935
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing DataLoader 0: 100%|██████████| 157/157 [00:18<00:00,  8.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy   

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:27<00:00,  7.94it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7883248329162598
        eval_loss            2.135984182357788
        hp_metric           0.7883248329162598
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7883248329162598
        eval_loss            2.135984182357788

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  7.66it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8304498195648193
        eval_loss           2.6672072410583496
        hp_metric           0.8304498195648193
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  7.66it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy       

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.79it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676470875740051
        eval_loss            2.230755567550659
        hp_metric           0.7676470875740051
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676470875740051
        eval_loss            2.230755567550659
 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:19<00:00,  8.23it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7609999775886536
        eval_loss           1.6658204793930054
        hp_metric           0.7609999775886536
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7609999775886536
        eval_loss           1.6658204793930054

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:26<00:00,  8.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676455974578857
        eval_loss           1.8520941734313965
        hp_metric           0.7676455974578857
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7676455974578857
        eval_loss           1.8520941734313965

In [7]:
# Calculate training duration for each taxonomy
from csv import DictReader
import datetime


def calculate_training_duration(file_prefix):
    """Calculate training duration from walltime in training CSV file"""
    try:
        with open(f"training_results/{file_prefix}_train.csv", "r") as f:
            reader = DictReader(f)
            rows = list(reader)

            if not rows:
                return "N/A"

            # Get first and last walltime
            start_time = float(rows[0]["Wall time"])
            end_time = float(rows[-1]["Wall time"])

            # Calculate duration in seconds
            duration_seconds = end_time - start_time

            # Convert to hours and minutes
            hours = int(duration_seconds // 3600)
            minutes = int((duration_seconds % 3600) // 60)

            if hours > 0:
                return f"{hours}h {minutes}m"
            else:
                return f"{minutes}m"
    except FileNotFoundError:
        return "N/A"


# Calculate training durations for all taxonomies
training_durations = {}
for taxonomy_name, config in taxonomies_config.items():
    duration = calculate_training_duration(config["logger_name"])
    training_durations[taxonomy_name] = duration
    print(f"{taxonomy_name.capitalize()} taxonomy training duration: {duration}")

# Add training duration to domain_results
for taxonomy_name in domain_results:
    domain_results[taxonomy_name]["training_time"] = training_durations[taxonomy_name]

# Create dataframe
df = pd.DataFrame.from_dict(domain_results, orient="index")

# Clear index
df.reset_index(drop=True, inplace=True)

# Print dataframe
print(df)

Hypothesis taxonomy training duration: 2h 7m
Mcfp taxonomy training duration: 2h 17m
Three_domain_hypothesis taxonomy training duration: 4h 58m
Three_domain_mcfp taxonomy training duration: 4h 59m
                      name  caltech101  caltech256  cifar100   unified  \
0               hypothesis    0.920415    0.827451       NaN  0.846957   
1                     mcfp    0.908881    0.804902       NaN  0.827094   
2  three_domain_hypothesis    0.957324    0.866993     0.752  0.788325   
3        three_domain_mcfp    0.830450    0.767647     0.761  0.767646   

  training_time  
0         2h 7m  
1        2h 17m  
2        4h 58m  
3        4h 59m  


In [10]:
# Evaluate baseline models and create baseline table
from library.models import ResNetModel

# Baseline model configurations from the real-world taxonomy notebooks
baseline_configs = {
    "Caltech-101": {
        "checkpoint": "resnet50-caltech101-min-val-loss.ckpt",
        "architecture": "ResNet-50",
        "optimizer": "SGD",
        "learning_rate": 0.01,
        "dataset_module": caltech101_dm,  # Use individual dataset module
    },
    "Caltech-256": {
        "checkpoint": "resnet50-caltech256-min-val-loss.ckpt",
        "architecture": "ResNet-50",
        "optimizer": "AdamW",
        "learning_rate": 0.001,
        "dataset_module": caltech256_dm,  # Use individual dataset module
    },
    "CIFAR-100": {
        "checkpoint": "resnet152-cifar100-min-val-loss.ckpt",
        "architecture": "ResNet-152",
        "optimizer": "AdamW",
        "learning_rate": 0.001,
        "dataset_module": cifar100_original_dm,  # Use original CIFAR-100 dataset module
    },
}

# Evaluate baseline models
baseline_results = {}
trainer = Trainer(logger=False, enable_checkpointing=False)

for dataset_name, config in baseline_configs.items():
    print(f"Evaluating baseline model: {config['checkpoint']}")

    # Load baseline model
    baseline_model = ResNetModel.load_from_checkpoint(
        f"checkpoints/{config['checkpoint']}"
    )

    # Test on the dataset
    test_results = trainer.test(baseline_model, datamodule=config["dataset_module"])
    accuracy = test_results[0]["eval_accuracy"] * 100  # Convert to percentage

    baseline_results[dataset_name] = {
        "Dataset": dataset_name,
        "Architecture": config["architecture"],
        "Optimizer": config["optimizer"],
        "Learning Rate": config["learning_rate"],
        "Test Accuracy": f"{accuracy:.2f}",
    }

# Create baseline models dataframe
baseline_df = pd.DataFrame.from_dict(baseline_results, orient="index")
baseline_df.reset_index(drop=True, inplace=True)

print("Baseline Model Results:")
print(baseline_df)

# Create LaTeX table for baseline models
baseline_latex_table = baseline_df.style.hide(axis="index").to_latex(
    caption="Baseline ResNet model performance on individual datasets. These single-domain models serve as reference points for evaluating the universal models.",
    label="tab:baseline_model_results",
    column_format="lcccc",
    position="ht",
    position_float="centering",
    hrules=True,
)

# Save baseline table to file
with open("../thesis/figures/baseline_model_results.tex", "w") as f:
    f.write(baseline_latex_table)

# Extract baseline accuracies for use in universal model table
caltech101_baseline = float(baseline_results["Caltech-101"]["Test Accuracy"])
caltech256_baseline = float(baseline_results["Caltech-256"]["Test Accuracy"])
cifar100_baseline = float(baseline_results["CIFAR-100"]["Test Accuracy"])

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Evaluating baseline model: resnet50-caltech101-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 28/28 [00:03<00:00,  8.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9204152226448059
        eval_loss           0.32788488268852234
        hp_metric           0.9204152226448059
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Evaluating baseline model: resnet50-caltech256-min-val-loss.ckpt

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 96/96 [00:14<00:00,  6.83it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6996731758117676
        eval_loss           1.6147780418395996
        hp_metric           0.6996731758117676
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Evaluating baseline model: resnet152-cifar100-min-val-loss.ckpt

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 313/313 [00:10<00:00, 30.79it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.10859999805688858
        eval_loss            4.112003803253174
        hp_metric           0.10859999805688858
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.10859999805688858
        eval_loss            4.112003803253

In [16]:
# Create LaTeX table from results
# Transform the dataframe to have better column names for the table
df_table = df.copy()

# Map taxonomy names to display names for LaTeX export
name_mapping = {
    "hypothesis": "Hypothesis (2 Domain)",
    "mcfp": "MCFP (2 Domain)",
    "three_domain_hypothesis": "Hypothesis (3 Domain)",
    "three_domain_mcfp": "MCFP (3 Domain)",
}

# Update the name column with display names
df_table["name"] = df_table["name"].map(name_mapping)

df_table.columns = [
    "Taxonomy",
    "Caltech-101",
    "Caltech-256",
    "CIFAR-100",
    "Avg",
    "Training Time",
]

# Reorder to move training time after taxonomy method
df_table = df_table[
    [
        "Taxonomy",
        "Training Time",
        "Caltech-101",
        "Caltech-256",
        "CIFAR-100",
        "Avg",
    ]
]

# Convert accuracy values to percentages and add delta values for domain columns
df_table["Avg"] = (df_table["Avg"] * 100).round(2)

# Store original numeric values for comparison
orig_caltech101 = df_table["Caltech-101"].values.copy()
orig_caltech256 = df_table["Caltech-256"].values.copy()
orig_cifar100 = df_table["CIFAR-100"].values.copy()
orig_combined = df_table["Avg"].values.copy()

# Convert columns to object type to avoid dtype warnings
df_table["Caltech-101"] = df_table["Caltech-101"].astype(object)
df_table["Caltech-256"] = df_table["Caltech-256"].astype(object)
df_table["CIFAR-100"] = df_table["CIFAR-100"].astype(object)
df_table["Avg"] = df_table["Avg"].astype(object)

# Find best values for each column (excluding N/A values for CIFAR-100)
best_caltech101_idx = orig_caltech101.argmax()
best_caltech256_idx = orig_caltech256.argmax()
# For CIFAR-100, only consider rows that have valid values (three-domain models)
valid_cifar100_mask = pd.notna(orig_cifar100) & (orig_cifar100 != None)
if valid_cifar100_mask.any():
    best_cifar100_idx = orig_cifar100[valid_cifar100_mask].argmax()
    # Convert to actual dataframe index
    best_cifar100_idx = df_table.index[valid_cifar100_mask][best_cifar100_idx]
else:
    best_cifar100_idx = -1  # No valid CIFAR-100 results
best_combined_idx = orig_combined.argmax()

# Add delta values for domain columns
for idx, row in df_table.iterrows():
    # Caltech-101 column with delta
    acc_101 = row["Caltech-101"] * 100
    delta_101 = acc_101 - caltech101_baseline
    sign_101 = "+" if delta_101 >= 0 else ""
    result_str = f"{acc_101:.2f} ({sign_101}{delta_101:.2f})"
    # Make best result bold
    if idx == best_caltech101_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_table.loc[idx, "Caltech-101"] = result_str

    # Caltech-256 column with delta
    acc_256 = row["Caltech-256"] * 100
    delta_256 = acc_256 - caltech256_baseline
    sign_256 = "+" if delta_256 >= 0 else ""
    result_str = f"{acc_256:.2f} ({sign_256}{delta_256:.2f})"
    # Make best result bold
    if idx == best_caltech256_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_table.loc[idx, "Caltech-256"] = result_str

    # CIFAR-100 column with delta (only for three-domain model)
    if pd.notna(row["CIFAR-100"]) and row["CIFAR-100"] is not None:
        acc_100 = row["CIFAR-100"] * 100
        delta_100 = acc_100 - cifar100_baseline
        sign_100 = "+" if delta_100 >= 0 else ""
        result_str = f"{acc_100:.2f} ({sign_100}{delta_100:.2f})"
        # Make best result bold
        if idx == best_cifar100_idx:
            result_str = f"\\textbf{{{result_str}}}"
        df_table.loc[idx, "CIFAR-100"] = result_str
    else:
        df_table.loc[idx, "CIFAR-100"] = "N/A"

# Format Avg column with proper rounding (no bold highlighting)
for idx, row in df_table.iterrows():
    avg_value = orig_combined[idx]
    df_table.loc[idx, "Avg"] = f"{avg_value:.2f}"

# Create LaTeX table
latex_table = df_table.style.hide(axis="index").to_latex(
    caption="Universal model evaluation results on multi-domain test datasets. Two-domain models were trained on Caltech-101 + Caltech-256, while three-domain models were trained on all three datasets. Models were evaluated on individual domains as well as the combined test set (no weighting was applied, the individual test sets were simply concatenated). Domain accuracy values show performance differences compared to single-domain baseline models (see Table~\\ref{tab:baseline_model_results}). Best results per column are shown in bold. All accuracy values are shown as percentages.",
    label="tab:universal_model_results",
    column_format="lcccccc",
    position="ht",
    position_float="centering",
    hrules=True,
)

# Save to file
with open("../thesis/figures/universal_model_results.tex", "w") as f:
    f.write(latex_table)

In [13]:
from csv import DictReader
import matplotlib

matplotlib.use("pgf")
import matplotlib.pyplot as plt

# LaTeX settings
plt.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "EB Garamond",
        "font.size": 11,
        "pgf.texsystem": "lualatex",
    }
)

# Create subplot with 4 plots, one for each taxonomy
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Configuration for all four taxonomies
taxonomy_configs = [
    {
        "name": "hypothesis",
        "title": "Hypothesis Taxonomy (2 Domains)",
        "file_prefix": "universal_hypothesis_multi_domain",
    },
    {
        "name": "mcfp",
        "title": "MCFP Taxonomy (2 Domains)",
        "file_prefix": "universal_mcfp_multi_domain",
    },
    {
        "name": "three_domain",
        "title": "Hypothesis Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_hypothesis",
    },
    {
        "name": "three_domain_mcfp",
        "title": "MCFP Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_mcfp",
    },
]

# Plot training curves for each taxonomy
for idx, config in enumerate(taxonomy_configs):
    row = idx // 2
    col = idx % 2
    ax = axes[row, col]

    try:
        # Load training data
        with open(f"training_results/{config['file_prefix']}_train.csv", "r") as f:
            reader = DictReader(f)
            steps_train = []
            train = []
            for row in reader:
                steps_train.append(int(row["Step"]))
                train.append(float(row["Value"]))

        # Load validation data
        with open(f"training_results/{config['file_prefix']}_val.csv", "r") as f:
            reader = DictReader(f)
            steps_val = []
            val = []
            for row in reader:
                steps_val.append(int(row["Step"]))
                val.append(float(row["Value"]))

        # Plot training and validation curves
        ax.plot(steps_train, train, label="Train", color="blue")
        ax.plot(steps_val, val, label="Validation", color="red")

    except FileNotFoundError:
        # If training files don't exist, show a placeholder
        ax.text(
            0.5,
            0.5,
            f"Training data\nnot available",
            ha="center",
            va="center",
            transform=ax.transAxes,
            bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"),
        )

    ax.set_xlabel("Steps")
    ax.set_ylabel("Accuracy")
    ax.set_title(config["title"])
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(
    "../thesis/figures/universal_model_training_curves.pgf", bbox_inches="tight"
)
plt.show()

  plt.show()
