In [1]:
import pandas as pd
import numpy as np
import torch
from torchvision.datasets import Caltech256, Caltech101, CIFAR100
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers

from library.taxonomy import Taxonomy
from library.models.universal_resnet import UniversalResNetModel
from library.datasets.caltech101 import Caltech101DataModule
from library.datasets.caltech256 import Caltech256DataModule
from library.datasets.cifar100 import CIFAR100ScaledDataModule, CIFAR100DataModule
from library.datasets.util import CombinedDataModule

# Load dataset information
caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech101_labels = Caltech101(root="datasets/caltech101", download=False).categories
cifar100_labels = CIFAR100(
    root="datasets/cifar100", download=False, train=False
).classes

print(f"Caltech-256 classes: {len(caltech256_labels)}")
print(f"Caltech-101 classes: {len(caltech101_labels)}")
print(f"CIFAR-100 classes: {len(cifar100_labels)}")

# Reduce the precision of matrix multiplication to speed up training
torch.set_float32_matmul_precision("medium")

Caltech-256 classes: 257
Caltech-101 classes: 101
CIFAR-100 classes: 100


In [2]:
# Load both taxonomies created from the real-world datasets
hypothesis_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_hypothesis.pkl")
mcfp_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_mcfp.pkl")
mcfp_binary_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_mcfp_binary.pkl")

# Load the threshold-based taxonomies with optimal parameters
density_threshold_taxonomy = Taxonomy.load(
    "taxonomies/caltech256_caltech101_density_threshold.pkl"
)
naive_threshold_taxonomy = Taxonomy.load(
    "taxonomies/caltech256_caltech101_naive_threshold.pkl"
)

# Load the three-domain taxonomies
three_domain_hypothesis_taxonomy = Taxonomy.load(
    "taxonomies/three_domain_hypothesis.pkl"
)
three_domain_mcfp_taxonomy = Taxonomy.load("taxonomies/three_domain_mcfp.pkl")
three_domain_mcfp_binary_taxonomy = Taxonomy.load(
    "taxonomies/three_domain_mcfp_binary.pkl"
)

# Load the three-domain threshold-based taxonomies
three_domain_density_threshold_taxonomy = Taxonomy.load(
    "taxonomies/three_domain_density_threshold.pkl"
)
three_domain_naive_threshold_taxonomy = Taxonomy.load(
    "taxonomies/three_domain_naive_threshold.pkl"
)

In [3]:
# Configuration for Multi-Domain Training

# Training configuration
TRAIN = False  # Set to True to train model from scratch

# Create individual dataset modules
caltech101_dm = Caltech101DataModule(batch_size=32)
caltech256_dm = Caltech256DataModule(batch_size=32)
cifar100_dm = CIFAR100ScaledDataModule(batch_size=32)
cifar100_original_dm = CIFAR100DataModule(batch_size=32)

# Create combined data module with domain IDs
# Domain 0: Caltech-101, Domain 1: Caltech-256
dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm],
    domain_ids=[0, 1],
    batch_size=64,
    num_workers=11,
)

# Create three-domain data module
# Domain 0: Caltech-101, Domain 1: Caltech-256, Domain 2: CIFAR-100
three_domain_dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm, cifar100_dm],
    domain_ids=[0, 1, 2],
    batch_size=64,
    num_workers=11,
)

dataset_name = "Caltech-101 + Caltech-256 (Multi-Domain)"
three_domain_dataset_name = "Caltech-101 + Caltech-256 + CIFAR-100 (Three-Domain)"

# Configuration for all taxonomies
taxonomies_config = {
    "hypothesis": {
        "taxonomy": hypothesis_taxonomy,
        "model_name": "universal-resnet50-hypothesis-multi-domain-min-val-loss",
        "logger_name": "universal_hypothesis_multi_domain",
    },
    "mcfp": {
        "taxonomy": mcfp_taxonomy,
        "model_name": "universal-resnet50-mcfp-multi-domain-min-val-loss",
        "logger_name": "universal_mcfp_multi_domain",
    },
    "mcfp_binary": {
        "taxonomy": mcfp_binary_taxonomy,
        "model_name": "universal-resnet50-mcfp-binary-multi-domain-min-val-loss",
        "logger_name": "universal_mcfp_binary_multi_domain",
    },
    "density_threshold": {
        "taxonomy": density_threshold_taxonomy,
        "model_name": "universal-resnet50-density-threshold-multi-domain-min-val-loss",
        "logger_name": "universal_density_threshold_multi_domain",
    },
    "naive_threshold": {
        "taxonomy": naive_threshold_taxonomy,
        "model_name": "universal-resnet50-naive-threshold-multi-domain-min-val-loss",
        "logger_name": "universal_naive_threshold_multi_domain",
    },
    "three_domain_hypothesis": {
        "taxonomy": three_domain_hypothesis_taxonomy,
        "model_name": "universal-resnet50-three-domain-hypothesis-min-val-loss",
        "logger_name": "universal_three_domain_hypothesis",
    },
    "three_domain_mcfp": {
        "taxonomy": three_domain_mcfp_taxonomy,
        "model_name": "universal-resnet50-three-domain-mcfp-min-val-loss",
        "logger_name": "universal_three_domain_mcfp",
    },
    "three_domain_mcfp_binary": {
        "taxonomy": three_domain_mcfp_binary_taxonomy,
        "model_name": "universal-resnet50-three-domain-mcfp-binary-min-val-loss",
        "logger_name": "universal_three_domain_mcfp_binary",
    },
    "three_domain_density_threshold": {
        "taxonomy": three_domain_density_threshold_taxonomy,
        "model_name": "universal-resnet50-three-domain-density-threshold-min-val-loss",
        "logger_name": "universal_three_domain_density_threshold",
    },
    "three_domain_naive_threshold": {
        "taxonomy": three_domain_naive_threshold_taxonomy,
        "model_name": "universal-resnet50-three-domain-naive-threshold-min-val-loss",
        "logger_name": "universal_three_domain_naive_threshold",
    },
}

In [4]:
# Training configuration (shared for both models)
training_config = {
    "max_epochs": 50,
    "optim": "adamw",
    "optim_kwargs": {
        "lr": 0.00005,  # Reduced from 0.0001
        "weight_decay": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-8,
    },
    "lr_scheduler": "cosine",  # Changed from multistep
    "lr_scheduler_kwargs": {
        "T_max": 50,  # matches max_epochs
        "eta_min": 1e-7,
    },
}

In [5]:
# Train models for both taxonomies
results = {}

for taxonomy_name, config in taxonomies_config.items():
    # Select appropriate dataset module
    if taxonomy_name in [
        "three_domain_hypothesis",
        "three_domain_mcfp",
        "three_domain_mcfp_binary",
        "three_domain_density_threshold",
        "three_domain_naive_threshold",
    ]:
        current_dataset_module = three_domain_dataset_module
    else:
        current_dataset_module = dataset_module

    # Create the Universal ResNet model for this taxonomy
    model = UniversalResNetModel(
        taxonomy=config["taxonomy"],
        architecture="resnet50",
        optim=training_config["optim"],
        optim_kwargs=training_config["optim_kwargs"],
        lr_scheduler=training_config["lr_scheduler"],
        lr_scheduler_kwargs=training_config["lr_scheduler_kwargs"],
    )

    # Setup trainer
    if TRAIN:
        tb_logger = pl_loggers.TensorBoardLogger(
            save_dir="logs", name=config["logger_name"]
        )

        trainer = Trainer(
            max_epochs=training_config["max_epochs"],
            logger=tb_logger,
            callbacks=[
                ModelCheckpoint(
                    dirpath="checkpoints",
                    monitor="val_accuracy",
                    mode="max",
                    save_top_k=1,
                    filename=config["model_name"],
                    enable_version_counter=False,
                )
            ],
        )

        # Train the model
        trainer.fit(model, datamodule=current_dataset_module)

        # Test the trained model
        test_results = trainer.test(datamodule=current_dataset_module, ckpt_path="best")

    else:
        trainer = Trainer(
            logger=False,
            enable_checkpointing=False,
        )

        # Load pre-trained model
        print(f"Loading pre-trained model: {config['model_name']}.ckpt")
        model = UniversalResNetModel.load_from_checkpoint(
            f"checkpoints/{config['model_name']}.ckpt",
            taxonomy=config[
                "taxonomy"
            ],  # Need to pass taxonomy since it's not serialized
        )

        # Test the loaded model
        test_results = trainer.test(model, datamodule=current_dataset_module)

    # Store results
    results[taxonomy_name] = test_results

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0:   3%|▎         | 2/62 [00:01<00:53,  1.11it/s]

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  5.77it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8461930155754089
        eval_loss           2.2396435737609863
        hp_metric           0.8461930155754089
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 23. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:09<00:00,  6.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8296409249305725
        eval_loss           1.6237250566482544
        hp_metric           0.8296409249305725
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-mcfp-binary-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:07<00:00,  7.80it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.90628981590271
        eval_loss           1.5973289012908936
        hp_metric            0.90628981590271
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-density-threshold-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:07<00:00,  7.80it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8421186804771423
        eval_loss            2.344407796859741
        hp_metric           0.8421186804771423
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-naive-threshold-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:09<00:00,  6.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.847466230392456
        eval_loss           2.2727222442626953
        hp_metric            0.847466230392456
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-hypothesis-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:28<00:00,  7.62it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6674804091453552
        eval_loss           2.4171056747436523
        hp_metric           0.6674804091453552
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 39. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-mcfp-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:28<00:00,  7.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7682200074195862
        eval_loss           1.8490538597106934
        hp_metric           0.7682200074195862
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-mcfp-binary-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:28<00:00,  7.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8403102159500122
        eval_loss           2.3036134243011475
        hp_metric           0.8403102159500122
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-density-threshold-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:28<00:00,  7.76it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8403819799423218
        eval_loss           2.6332054138183594
        hp_metric           0.8403819799423218
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-three-domain-naive-threshold-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 218/218 [00:28<00:00,  7.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7690816521644592
        eval_loss            2.089416265487671
        hp_metric           0.7690816521644592
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [6]:
# Create individual combined data modules for each domain
# These maintain the (target, domain_id) tuple format expected by the universal models
caltech101_combined_dm = CombinedDataModule(
    dataset_modules=[caltech101_dm],
    domain_ids=[0],  # Domain 0 for Caltech-101
    batch_size=64,
    num_workers=11,
)

caltech256_combined_dm = CombinedDataModule(
    dataset_modules=[caltech256_dm],
    domain_ids=[1],  # Domain 1 for Caltech-256
    batch_size=64,
    num_workers=11,
)

cifar100_combined_dm = CombinedDataModule(
    dataset_modules=[cifar100_dm],
    domain_ids=[2],  # Domain 2 for CIFAR-100
    batch_size=64,
    num_workers=11,
)

# Test each model on individual domains
domain_results = {}
for taxonomy_name, config in taxonomies_config.items():
    # Load the trained model
    print(f"Loading pre-trained model: {config['model_name']}.ckpt")
    model = UniversalResNetModel.load_from_checkpoint(
        f"checkpoints/{config['model_name']}.ckpt", taxonomy=config["taxonomy"]
    )

    # Create trainer for testing
    trainer = Trainer(
        logger=False,
        enable_checkpointing=False,
    )

    domain_results[taxonomy_name] = {
        "name": taxonomy_name,
    }

    # Test on Caltech-101 (Domain 0)
    caltech101_results = trainer.test(model, datamodule=caltech101_combined_dm)
    domain_results[taxonomy_name]["caltech101"] = caltech101_results[0]["eval_accuracy"]

    # Test on Caltech-256 (Domain 1)
    caltech256_results = trainer.test(model, datamodule=caltech256_combined_dm)
    domain_results[taxonomy_name]["caltech256"] = caltech256_results[0]["eval_accuracy"]

    # Test on CIFAR-100 (Domain 2) - only for three-domain models
    if taxonomy_name in [
        "three_domain_hypothesis",
        "three_domain_mcfp",
        "three_domain_mcfp_binary",
        "three_domain_density_threshold",
        "three_domain_naive_threshold",
    ]:
        cifar100_results = trainer.test(model, datamodule=cifar100_combined_dm)
        domain_results[taxonomy_name]["cifar100"] = cifar100_results[0]["eval_accuracy"]

        # Calculate unified accuracy as mean of per-domain accuracies (equal weighting)
        unified_accuracy = (
            domain_results[taxonomy_name]["caltech101"]
            + domain_results[taxonomy_name]["caltech256"]
            + domain_results[taxonomy_name]["cifar100"]
        ) / 3
        domain_results[taxonomy_name]["unified"] = unified_accuracy
    else:
        # For two-domain models, CIFAR-100 accuracy is N/A
        domain_results[taxonomy_name]["cifar100"] = None

        # Calculate unified accuracy as mean of per-domain accuracies (equal weighting)
        unified_accuracy = (
            domain_results[taxonomy_name]["caltech101"]
            + domain_results[taxonomy_name]["caltech256"]
        ) / 2
        domain_results[taxonomy_name]["unified"] = unified_accuracy

Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9181084036827087
        eval_loss           2.7212612628936768
        hp_metric           0.9181084036827087
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 35. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.64it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8284313678741455
        eval_loss           2.1086208820343018
        hp_metric           0.8284313678741455
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 52. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Loading pre-trained model: universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.23it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9123414158821106
        eval_loss            2.100937843322754
        hp_metric           0.9123414158821106
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:07<00:00,  6.25it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.80751633644104
        eval_loss           1.4989861249923706
        hp_metric            0.80751633644104
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-mcfp-binary-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.15it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9273356199264526
        eval_loss            2.218851327896118
        hp_metric           0.9273356199264526
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.73it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8970588445663452
        eval_loss            1.430214762687683
        hp_metric           0.8970588445663452
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-density-threshold-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.13it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9296424388885498
        eval_loss            2.761981725692749
        hp_metric           0.9296424388885498
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8153594732284546
        eval_loss            2.232377767562866
        hp_metric           0.8153594732284546
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-naive-threshold-multi-domain-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.931949257850647
        eval_loss           2.8679933547973633
        hp_metric            0.931949257850647
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.822549045085907
        eval_loss            2.103687047958374
        hp_metric            0.822549045085907
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-three-domain-hypothesis-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6874279379844666
        eval_loss           3.7115421295166016
        hp_metric           0.6874279379844666
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.60it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.5816993713378906
        eval_loss            3.058987617492676
        hp_metric           0.5816993713378906
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:20<00:00,  7.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6902999877929688
        eval_loss           2.1113805770874023
        hp_metric           0.6902999877929688
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Loading pre-trained model: universal-resnet50-three-domain-mcfp-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8327566385269165
        eval_loss           2.6658236980438232
        hp_metric           0.8327566385269165
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.27it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7650327086448669
        eval_loss           2.2333476543426514
        hp_metric           0.7650327086448669
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:19<00:00,  8.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.7609999775886536
        eval_loss           1.6658204793930054
        hp_metric           0.7609999775886536
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-three-domain-mcfp-binary-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9457900524139404
        eval_loss           3.0320279598236084
        hp_metric           0.9457900524139404
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:07<00:00,  6.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.851307213306427
        eval_loss           2.3696138858795166
        hp_metric            0.851307213306427
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:18<00:00,  8.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8270999789237976
        eval_loss            2.225367784500122
        hp_metric           0.8270999789237976
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-three-domain-density-threshold-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9538639187812805
        eval_loss           3.2175261974334717
        hp_metric           0.9538639187812805
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.13it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8352941274642944
        eval_loss            2.671121120452881
        hp_metric           0.8352941274642944
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:18<00:00,  8.30it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8313999772071838
        eval_loss            2.57104754447937
        hp_metric           0.8313999772071838
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Loading pre-trained model: universal-resnet50-three-domain-naive-threshold-min-val-loss.ckpt


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00,  8.11it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9550173282623291
        eval_loss           3.2000527381896973
        hp_metric           0.9550173282623291
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 48/48 [00:06<00:00,  7.42it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8535947799682617
        eval_loss           2.6170356273651123
        hp_metric           0.8535947799682617
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:20<00:00,  7.59it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.725600004196167
        eval_loss            1.833083152770996
        hp_metric            0.725600004196167
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [7]:
# Create dataframe
df = pd.DataFrame.from_dict(domain_results, orient="index")

# Clear index
df.reset_index(drop=True, inplace=True)

# Print dataframe
print(df)

                             name  caltech101  caltech256  cifar100   unified
0                      hypothesis    0.918108    0.828431       NaN  0.873270
1                            mcfp    0.912341    0.807516       NaN  0.859929
2                     mcfp_binary    0.927336    0.897059       NaN  0.912197
3               density_threshold    0.929642    0.815359       NaN  0.872501
4                 naive_threshold    0.931949    0.822549       NaN  0.877249
5         three_domain_hypothesis    0.687428    0.581699    0.6903  0.653142
6               three_domain_mcfp    0.832757    0.765033    0.7610  0.786263
7        three_domain_mcfp_binary    0.945790    0.851307    0.8271  0.874732
8  three_domain_density_threshold    0.953864    0.835294    0.8314  0.873519
9    three_domain_naive_threshold    0.955017    0.853595    0.7256  0.844737


In [8]:
# Evaluate baseline models and create baseline table
from library.models.resnet import ResNetModel

# Baseline model configurations from the real-world taxonomy notebooks
baseline_configs = {
    "Caltech-101": {
        "checkpoint": "resnet50-caltech101-min-val-loss.ckpt",
        "architecture": "ResNet-50",
        "optimizer": "SGD",
        "learning_rate": 0.01,
        "dataset_module": caltech101_dm,  # Use individual dataset module
    },
    "Caltech-256": {
        "checkpoint": "resnet50-caltech256-min-val-loss.ckpt",
        "architecture": "ResNet-50",
        "optimizer": "AdamW",
        "learning_rate": 0.001,
        "dataset_module": caltech256_dm,  # Use individual dataset module
    },
    "CIFAR-100": {
        "checkpoint": "resnet152-cifar100-min-val-loss.ckpt",
        "architecture": "ResNet-152",
        "optimizer": "AdamW",
        "learning_rate": 0.001,
        "dataset_module": cifar100_original_dm,  # Use original CIFAR-100 dataset module
    },
}

# Evaluate baseline models
baseline_results = {}
trainer = Trainer(logger=False, enable_checkpointing=False)

for dataset_name, config in baseline_configs.items():
    print(f"Evaluating baseline model: {config['checkpoint']}")

    # Load baseline model
    baseline_model = ResNetModel.load_from_checkpoint(
        f"checkpoints/{config['checkpoint']}"
    )

    # Test on the dataset
    test_results = trainer.test(baseline_model, datamodule=config["dataset_module"])
    accuracy = test_results[0]["eval_accuracy"] * 100  # Convert to percentage

    baseline_results[dataset_name] = {
        "Dataset": dataset_name,
        "Architecture": config["architecture"],
        "Optimizer": config["optimizer"],
        "Test Accuracy": f"{accuracy:.2f}",
    }

# Create baseline models dataframe
baseline_df = pd.DataFrame.from_dict(baseline_results, orient="index")
baseline_df.reset_index(drop=True, inplace=True)

print("Baseline Model Results:")
print(baseline_df)

# Create LaTeX table for baseline models
baseline_latex_table = baseline_df.style.hide(axis="index").to_latex(
    caption="Baseline ResNet model performance on individual datasets. These single-domain models serve as reference points for evaluating the universal models. Every baseline model was trained for 50 epochs.",
    label="tab:baseline_model_results",
    column_format="lccc",
    position="ht",
    position_float="centering",
    hrules=True,
)

# Save baseline table to file
with open("../thesis/figures/baseline_model_results.tex", "w") as f:
    f.write(baseline_latex_table)

# Extract baseline accuracies for use in universal model table
caltech101_baseline = float(baseline_results["Caltech-101"]["Test Accuracy"])
caltech256_baseline = float(baseline_results["Caltech-256"]["Test Accuracy"])
cifar100_baseline = float(baseline_results["CIFAR-100"]["Test Accuracy"])

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Evaluating baseline model: resnet50-caltech101-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 28/28 [00:03<00:00,  8.29it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.9181084036827087
        eval_loss           0.3181077837944031
        hp_metric           0.9181084036827087
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Evaluating baseline model: resnet50-caltech256-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 96/96 [00:13<00:00,  6.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6947712302207947
        eval_loss            1.641042709350586
        hp_metric           0.6947712302207947
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Evaluating baseline model: resnet152-cifar100-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 313/313 [00:10<00:00, 28.94it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.6047999858856201
        eval_loss           1.9387381076812744
        hp_metric           0.6047999858856201
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Baseline Model Results:
       Dataset Architecture Optimizer Test Accuracy
0  Caltech-101    ResNet-50       SGD         91.81
1  Caltech-256    ResNet-50     AdamW         69.48
2    CIFAR-100   ResNet-152     AdamW         60.48


In [9]:
# Split results into 2-domain and 3-domain models
df_2domain = (
    df[~df["name"].str.startswith("three_domain")].copy().reset_index(drop=True)
)
df_3domain = df[df["name"].str.startswith("three_domain")].copy().reset_index(drop=True)

# Map taxonomy names to display names for LaTeX export (without domain suffix)
name_mapping_2domain = {
    "hypothesis": "Hypothesis",
    "mcfp": "MCFP",
    "mcfp_binary": "MCFP Binary",
    "density_threshold": "Density Threshold",
    "naive_threshold": "Naive Threshold",
}

name_mapping_3domain = {
    "three_domain_hypothesis": "Hypothesis",
    "three_domain_mcfp": "MCFP",
    "three_domain_mcfp_binary": "MCFP Binary",
    "three_domain_density_threshold": "Density Threshold",
    "three_domain_naive_threshold": "Naive Threshold",
}

# Process 2-domain models table
df_2domain_table = df_2domain.copy()
df_2domain_table["name"] = df_2domain_table["name"].map(name_mapping_2domain)
df_2domain_table.columns = [
    "Taxonomy",
    "Caltech-101",
    "Caltech-256",
    "CIFAR-100",
    "Avg",
]

# Select only relevant columns for 2-domain (no CIFAR-100)
df_2domain_table = df_2domain_table[["Taxonomy", "Caltech-101", "Caltech-256", "Avg"]]

# Convert accuracy values to percentages
df_2domain_table["Avg"] = (df_2domain_table["Avg"] * 100).round(2)

# Store original numeric values for comparison
orig_2domain_caltech101 = df_2domain_table["Caltech-101"].values.copy()
orig_2domain_caltech256 = df_2domain_table["Caltech-256"].values.copy()
orig_2domain_combined = df_2domain_table["Avg"].values.copy()

# Convert columns to object type to avoid dtype warnings
df_2domain_table["Caltech-101"] = df_2domain_table["Caltech-101"].astype(object)
df_2domain_table["Caltech-256"] = df_2domain_table["Caltech-256"].astype(object)
df_2domain_table["Avg"] = df_2domain_table["Avg"].astype(object)

# Find best values for each column in 2-domain table
best_2domain_caltech101_idx = orig_2domain_caltech101.argmax()
best_2domain_caltech256_idx = orig_2domain_caltech256.argmax()
best_2domain_combined_idx = orig_2domain_combined.argmax()

# Add delta values for 2-domain table
for i, (idx, row) in enumerate(df_2domain_table.iterrows()):
    # Caltech-101 column with delta
    acc_101 = orig_2domain_caltech101[i]  # Use array index i instead of DataFrame index
    delta_101 = acc_101 * 100 - caltech101_baseline
    sign_101 = "+" if delta_101 >= 0 else ""
    result_str = f"{acc_101 * 100:.2f} ({sign_101}{delta_101:.2f})"
    # Make best result bold
    if i == best_2domain_caltech101_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_2domain_table.loc[idx, "Caltech-101"] = result_str

    # Caltech-256 column with delta
    acc_256 = orig_2domain_caltech256[i]  # Use array index i instead of DataFrame index
    delta_256 = acc_256 * 100 - caltech256_baseline
    sign_256 = "+" if delta_256 >= 0 else ""
    result_str = f"{acc_256 * 100:.2f} ({sign_256}{delta_256:.2f})"
    # Make best result bold
    if i == best_2domain_caltech256_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_2domain_table.loc[idx, "Caltech-256"] = result_str

    # Format Avg column with bold highlighting for best result
    avg_value = orig_2domain_combined[i]  # Use array index i instead of DataFrame index
    result_str = f"{avg_value:.2f}"
    if i == best_2domain_combined_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_2domain_table.loc[idx, "Avg"] = result_str

# Process 3-domain models table
df_3domain_table = df_3domain.copy()
df_3domain_table["name"] = df_3domain_table["name"].map(name_mapping_3domain)
df_3domain_table.columns = [
    "Taxonomy",
    "Caltech-101",
    "Caltech-256",
    "CIFAR-100",
    "Avg",
]

# Convert accuracy values to percentages
df_3domain_table["Avg"] = (df_3domain_table["Avg"] * 100).round(2)

# Store original numeric values for comparison
orig_3domain_caltech101 = df_3domain_table["Caltech-101"].values.copy()
orig_3domain_caltech256 = df_3domain_table["Caltech-256"].values.copy()
orig_3domain_cifar100 = df_3domain_table["CIFAR-100"].values.copy()
orig_3domain_combined = df_3domain_table["Avg"].values.copy()

# Convert columns to object type to avoid dtype warnings
df_3domain_table["Caltech-101"] = df_3domain_table["Caltech-101"].astype(object)
df_3domain_table["Caltech-256"] = df_3domain_table["Caltech-256"].astype(object)
df_3domain_table["CIFAR-100"] = df_3domain_table["CIFAR-100"].astype(object)
df_3domain_table["Avg"] = df_3domain_table["Avg"].astype(object)

# Find best values for each column in 3-domain table
best_3domain_caltech101_idx = orig_3domain_caltech101.argmax()
best_3domain_caltech256_idx = orig_3domain_caltech256.argmax()
best_3domain_cifar100_idx = orig_3domain_cifar100.argmax()
best_3domain_combined_idx = orig_3domain_combined.argmax()

# Add delta values for 3-domain table
for i, (idx, row) in enumerate(df_3domain_table.iterrows()):
    # Caltech-101 column with delta
    acc_101 = orig_3domain_caltech101[i]  # Use array index i instead of DataFrame index
    delta_101 = acc_101 * 100 - caltech101_baseline
    sign_101 = "+" if delta_101 >= 0 else ""
    result_str = f"{acc_101 * 100:.2f} ({sign_101}{delta_101:.2f})"
    # Make best result bold
    if i == best_3domain_caltech101_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_3domain_table.loc[idx, "Caltech-101"] = result_str

    # Caltech-256 column with delta
    acc_256 = orig_3domain_caltech256[i]  # Use array index i instead of DataFrame index
    delta_256 = acc_256 * 100 - caltech256_baseline
    sign_256 = "+" if delta_256 >= 0 else ""
    result_str = f"{acc_256 * 100:.2f} ({sign_256}{delta_256:.2f})"
    # Make best result bold
    if i == best_3domain_caltech256_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_3domain_table.loc[idx, "Caltech-256"] = result_str

    # CIFAR-100 column with delta
    acc_100 = orig_3domain_cifar100[i]  # Use array index i instead of DataFrame index
    delta_100 = acc_100 * 100 - cifar100_baseline
    sign_100 = "+" if delta_100 >= 0 else ""
    result_str = f"{acc_100 * 100:.2f} ({sign_100}{delta_100:.2f})"
    # Make best result bold
    if i == best_3domain_cifar100_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_3domain_table.loc[idx, "CIFAR-100"] = result_str

    # Format Avg column with bold highlighting for best result
    avg_value = orig_3domain_combined[i]  # Use array index i instead of DataFrame index
    result_str = f"{avg_value:.2f}"
    if i == best_3domain_combined_idx:
        result_str = f"\\textbf{{{result_str}}}"
    df_3domain_table.loc[idx, "Avg"] = result_str

# Create LaTeX table for 2-domain models
latex_table_2domain = df_2domain_table.style.hide(axis="index").to_latex(
    caption="Universal model evaluation results for two-domain models trained on Caltech-101 + Caltech-256. Models were evaluated on the test sets of the individual domains. Domain accuracy values show performance differences compared to single-domain baseline models (see Table~\\ref{tab:baseline_model_results}). Best results per column are shown in bold. All accuracy values are shown as percentages. Density Threshold models use parameter 0.6, Naive Threshold models use parameter 0.1.",
    label="tab:universal_model_results_2domain",
    column_format="lccc",
    position="ht",
    position_float="centering",
    hrules=True,
)

# Create LaTeX table for 3-domain models
latex_table_3domain = df_3domain_table.style.hide(axis="index").to_latex(
    caption="Universal model evaluation results for three-domain models trained on Caltech-101 + Caltech-256 + CIFAR-100. Models were evaluated on the test sets of the individual domains. Domain accuracy values show performance differences compared to single-domain baseline models (see Table~\\ref{tab:baseline_model_results}). Best results per column are shown in bold. All accuracy values are shown as percentages. Density Threshold models use parameter 0.6, Naive Threshold models use parameter 0.1.",
    label="tab:universal_model_results_3domain",
    column_format="lcccc",
    position="ht",
    position_float="centering",
    hrules=True,
)

# Save both tables to separate files
with open("../thesis/figures/universal_model_results_2domain.tex", "w") as f:
    f.write(latex_table_2domain)

with open("../thesis/figures/universal_model_results_3domain.tex", "w") as f:
    f.write(latex_table_3domain)

In [10]:
from csv import DictReader
import matplotlib

matplotlib.use("pgf")
import matplotlib.pyplot as plt

# LaTeX settings
plt.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "EB Garamond",
        "font.size": 11,
        "pgf.texsystem": "lualatex",
    }
)

# Create subplot with 10 plots, one for each taxonomy (2x5 grid)
fig, axes = plt.subplots(2, 5, figsize=(25, 10))

# Configuration for all ten taxonomies
taxonomy_configs = [
    {
        "name": "hypothesis",
        "title": "Hypothesis Taxonomy (2 Domains)",
        "file_prefix": "universal_hypothesis_multi_domain",
    },
    {
        "name": "mcfp",
        "title": "MCFP Taxonomy (2 Domains)",
        "file_prefix": "universal_mcfp_multi_domain",
    },
    {
        "name": "mcfp_binary",
        "title": "MCFP Binary Taxonomy (2 Domains)",
        "file_prefix": "universal_mcfp_binary_multi_domain",
    },
    {
        "name": "density_threshold",
        "title": "Density Threshold Taxonomy (2 Domains)",
        "file_prefix": "universal_density_threshold_multi_domain",
    },
    {
        "name": "naive_threshold",
        "title": "Naive Threshold Taxonomy (2 Domains)",
        "file_prefix": "universal_naive_threshold_multi_domain",
    },
    {
        "name": "three_domain_hypothesis",
        "title": "Hypothesis Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_hypothesis",
    },
    {
        "name": "three_domain_mcfp",
        "title": "MCFP Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_mcfp",
    },
    {
        "name": "three_domain_mcfp_binary",
        "title": "MCFP Binary Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_mcfp_binary",
    },
    {
        "name": "three_domain_density_threshold",
        "title": "Density Threshold Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_density_threshold",
    },
    {
        "name": "three_domain_naive_threshold",
        "title": "Naive Threshold Taxonomy (3 Domains)",
        "file_prefix": "universal_three_domain_naive_threshold",
    },
]

# Plot training curves for each taxonomy
for idx, config in enumerate(taxonomy_configs):
    row = idx // 5
    col = idx % 5
    ax = axes[row, col]

    try:
        # Load training data
        with open(f"training_results/{config['file_prefix']}_train.csv", "r") as f:
            reader = DictReader(f)
            steps_train = []
            train = []
            for row_data in reader:
                steps_train.append(int(row_data["Step"]))
                train.append(float(row_data["Value"]))

        # Load validation data
        with open(f"training_results/{config['file_prefix']}_val.csv", "r") as f:
            reader = DictReader(f)
            steps_val = []
            val = []
            for row_data in reader:
                steps_val.append(int(row_data["Step"]))
                val.append(float(row_data["Value"]))

        # Plot training and validation curves
        ax.plot(steps_train, train, label="Train", color="blue")
        ax.plot(steps_val, val, label="Validation", color="red")

    except FileNotFoundError:
        # If training files don't exist, show a placeholder
        ax.text(
            0.5,
            0.5,
            f"Training data\nnot available",
            ha="center",
            va="center",
            transform=ax.transAxes,
            bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"),
        )

    ax.set_xlabel("Steps")
    ax.set_ylabel("Accuracy")
    ax.set_title(config["title"], fontsize=9)
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(
    "../thesis/figures/universal_model_training_curves.pgf", bbox_inches="tight"
)
plt.show()

  plt.show()
