In [1]:
import pandas as pd
import numpy as np
import torch
from torchvision.datasets import Caltech256, Caltech101, CIFAR100
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers

from library.taxonomy import Taxonomy
from library.models import UniversalResNetModel
from library.datasets import (
    Caltech256DataModule,
    Caltech101DataModule,
    CIFAR100ScaledDataModule,
    CombinedDataModule,
)

# Load dataset information
caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech101_labels = Caltech101(root="datasets/caltech101", download=False).categories
cifar100_labels = CIFAR100(
    root="datasets/cifar100", download=False, train=False
).classes

print(f"Caltech-256 classes: {len(caltech256_labels)}")
print(f"Caltech-101 classes: {len(caltech101_labels)}")
print(f"CIFAR-100 classes: {len(cifar100_labels)}")

# Reduce the precision of matrix multiplication to speed up training
torch.set_float32_matmul_precision("medium")

Caltech-256 classes: 257
Caltech-101 classes: 101
CIFAR-100 classes: 100


In [2]:
# Load both taxonomies created from the real-world datasets
hypothesis_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_hypothesis.pkl")
mcfp_taxonomy = Taxonomy.load("taxonomies/caltech256_caltech101_mcfp.pkl")

# Load the three-domain taxonomy
three_domain_taxonomy = Taxonomy.load("taxonomies/three_domain_hypothesis.pkl")

In [3]:
# Configuration for Multi-Domain Training

# Training configuration
TRAIN = False  # Set to True to train model from scratch

# Create individual dataset modules
caltech101_dm = Caltech101DataModule(batch_size=32)
caltech256_dm = Caltech256DataModule(batch_size=32)
cifar100_dm = CIFAR100ScaledDataModule(batch_size=32)

# Create combined data module with domain IDs
# Domain 0: Caltech-101, Domain 1: Caltech-256
dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm],
    domain_ids=[0, 1],
    batch_size=64,
    num_workers=11,
)

# Create three-domain data module
# Domain 0: Caltech-101, Domain 1: Caltech-256, Domain 2: CIFAR-100
three_domain_dataset_module = CombinedDataModule(
    dataset_modules=[caltech101_dm, caltech256_dm, cifar100_dm],
    domain_ids=[0, 1, 2],
    batch_size=64,
    num_workers=11,
)

dataset_name = "Caltech-101 + Caltech-256 (Multi-Domain)"
three_domain_dataset_name = "Caltech-101 + Caltech-256 + CIFAR-100 (Three-Domain)"

# Configuration for both taxonomies
taxonomies_config = {
    "hypothesis": {
        "taxonomy": hypothesis_taxonomy,
        "model_name": "universal-resnet50-hypothesis-multi-domain-min-val-loss",
        "logger_name": "universal_hypothesis_multi_domain",
    },
    "mcfp": {
        "taxonomy": mcfp_taxonomy,
        "model_name": "universal-resnet50-mcfp-multi-domain-min-val-loss",
        "logger_name": "universal_mcfp_multi_domain",
    },
    "three_domain": {
        "taxonomy": three_domain_taxonomy,
        "model_name": "universal-resnet50-three-domain-hypothesis-min-val-loss",
        "logger_name": "universal_three_domain_hypothesis",
    },
}

In [4]:
# Training configuration (shared for both models)
training_config = {
    "max_epochs": 50,
    "optim": "adamw",
    "optim_kwargs": {
        "lr": 0.00005,  # Reduced from 0.0001
        "weight_decay": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-8,
    },
    "lr_scheduler": "cosine",  # Changed from multistep
    "lr_scheduler_kwargs": {
        "T_max": 50,  # matches max_epochs
        "eta_min": 1e-7,
    },
}

In [5]:
# Train models for both taxonomies
results = {}

for taxonomy_name, config in taxonomies_config.items():
    # Select appropriate dataset module
    if taxonomy_name == "three_domain":
        current_dataset_module = three_domain_dataset_module
        TRAIN = True
    else:
        current_dataset_module = dataset_module

    # Create the Universal ResNet model for this taxonomy
    model = UniversalResNetModel(
        taxonomy=config["taxonomy"],
        architecture="resnet50",
        optim=training_config["optim"],
        optim_kwargs=training_config["optim_kwargs"],
        lr_scheduler=training_config["lr_scheduler"],
        lr_scheduler_kwargs=training_config["lr_scheduler_kwargs"],
    )

    # Setup trainer
    if TRAIN:
        tb_logger = pl_loggers.TensorBoardLogger(
            save_dir="logs", name=config["logger_name"]
        )

        trainer = Trainer(
            max_epochs=training_config["max_epochs"],
            logger=tb_logger,
            callbacks=[
                ModelCheckpoint(
                    dirpath="checkpoints",
                    monitor="val_accuracy",
                    mode="max",
                    save_top_k=1,
                    filename=config["model_name"],
                    enable_version_counter=False,
                )
            ],
        )

        # Train the model
        trainer.fit(model, datamodule=current_dataset_module)

        # Test the trained model
        test_results = trainer.test(datamodule=current_dataset_module, ckpt_path="best")

    else:
        trainer = Trainer(
            logger=False,
            enable_checkpointing=False,
        )

        # Load pre-trained model
        print(f"Loading pre-trained model: {config['model_name']}.ckpt")
        model = UniversalResNetModel.load_from_checkpoint(
            f"checkpoints/{config['model_name']}.ckpt",
            taxonomy=config[
                "taxonomy"
            ],  # Need to pass taxonomy since it's not serialized
        )

        # Test the loaded model
        test_results = trainer.test(model, datamodule=current_dataset_module)

    # Store results
    results[taxonomy_name] = test_results

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-hypothesis-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0:   2%|▏         | 1/62 [00:02<02:18,  0.44it/s]

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 64. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Testing DataLoader 0: 100%|██████████| 62/62 [00:13<00:00,  4.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy          0.845683753490448
        eval_loss            2.238147735595703
        hp_metric            0.845683753490448
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 23. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading pre-trained model: universal-resnet50-mcfp-multi-domain-min-val-loss.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  5.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8260759115219116
        eval_loss           1.6332345008850098
        hp_metric           0.8260759115219116
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/bjoern/dev/master-thesis/project/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | ResNet           | 26.9 M | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
26.9 M    Trainable params
0         Non-trainable params
26.9 M    Total params
107.492   Total estimated model params size (MB)
154       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 1117/1117 [07:46<00:00,  2.40it/s, v_num=4]      

/home/bjoern/miniconda3/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 41. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 5:   8%|▊         | 93/1117 [00:57<10:38,  1.60it/s, v_num=4]  


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
# Create individual combined data modules for each domain
# These maintain the (target, domain_id) tuple format expected by the universal models
caltech101_combined_dm = CombinedDataModule(
    dataset_modules=[caltech101_dm],
    domain_ids=[0],  # Domain 0 for Caltech-101
    batch_size=64,
    num_workers=11,
)

caltech256_combined_dm = CombinedDataModule(
    dataset_modules=[caltech256_dm],
    domain_ids=[1],  # Domain 1 for Caltech-256
    batch_size=64,
    num_workers=11,
)

cifar100_combined_dm = CombinedDataModule(
    dataset_modules=[cifar100_dm],
    domain_ids=[2],  # Domain 2 for CIFAR-100
    batch_size=64,
    num_workers=11,
)

# Test each model on individual domains
domain_results = {}
for taxonomy_name, config in taxonomies_config.items():
    # Load the trained model
    print(f"Loading pre-trained model: {config['model_name']}.ckpt")
    model = UniversalResNetModel.load_from_checkpoint(
        f"checkpoints/{config['model_name']}.ckpt", taxonomy=config["taxonomy"]
    )

    # Create trainer for testing
    trainer = Trainer(
        logger=False,
        enable_checkpointing=False,
    )

    domain_results[taxonomy_name] = {
        "name": taxonomy_name,
    }

    # Test on Caltech-101 (Domain 0)
    caltech101_results = trainer.test(model, datamodule=caltech101_combined_dm)
    domain_results[taxonomy_name]["caltech101"] = caltech101_results[0]["eval_accuracy"]

    # Test on Caltech-256 (Domain 1)
    caltech256_results = trainer.test(model, datamodule=caltech256_combined_dm)
    domain_results[taxonomy_name]["caltech256"] = caltech256_results[0]["eval_accuracy"]

    # Test on CIFAR-100 (Domain 2) - only for three-domain model
    if taxonomy_name == "three_domain":
        cifar100_results = trainer.test(model, datamodule=cifar100_combined_dm)
        domain_results[taxonomy_name]["cifar100"] = cifar100_results[0]["eval_accuracy"]

        # Test on all three domains together
        three_domain_results = trainer.test(
            model, datamodule=three_domain_dataset_module
        )
        domain_results[taxonomy_name]["unified"] = three_domain_results[0][
            "eval_accuracy"
        ]
    else:
        # For two-domain models, CIFAR-100 accuracy is N/A
        domain_results[taxonomy_name]["cifar100"] = None

        # Test on original test (both)
        original_results = trainer.test(model, datamodule=dataset_module)
        domain_results[taxonomy_name]["unified"] = original_results[0]["eval_accuracy"]

In [None]:
# Create dataframe
df = pd.DataFrame.from_dict(domain_results, orient="index")

# Clear index
df.reset_index(drop=True, inplace=True)

# Print dataframe
print(df)

In [None]:
# Calculate training duration for each taxonomy
from csv import DictReader
import datetime


def calculate_training_duration(file_prefix):
    """Calculate training duration from walltime in training CSV file"""
    try:
        with open(f"training_results/{file_prefix}_train.csv", "r") as f:
            reader = DictReader(f)
            rows = list(reader)

            if not rows:
                return "N/A"

            # Get first and last walltime
            start_time = float(rows[0]["Wall time"])
            end_time = float(rows[-1]["Wall time"])

            # Calculate duration in seconds
            duration_seconds = end_time - start_time

            # Convert to hours and minutes
            hours = int(duration_seconds // 3600)
            minutes = int((duration_seconds % 3600) // 60)

            if hours > 0:
                return f"{hours}h {minutes}m"
            else:
                return f"{minutes}m"
    except FileNotFoundError:
        return "N/A"


# Calculate training durations for all taxonomies
training_durations = {}
for taxonomy_name, config in taxonomies_config.items():
    duration = calculate_training_duration(config["logger_name"])
    training_durations[taxonomy_name] = duration
    print(f"{taxonomy_name.capitalize()} taxonomy training duration: {duration}")

# Add training duration to domain_results
for taxonomy_name in domain_results:
    domain_results[taxonomy_name]["training_time"] = training_durations[taxonomy_name]

In [None]:
# Create LaTeX table from results
# Transform the dataframe to have better column names for the table
df_table = df.copy()
df_table.columns = [
    "Taxonomy Method",
    "Caltech-101",
    "Caltech-256",
    "CIFAR-100",
    "Combined",
    "Training Time",
]

# Reorder to move training time after taxonomy method
df_table = df_table[
    [
        "Taxonomy Method",
        "Training Time",
        "Caltech-101",
        "Caltech-256",
        "CIFAR-100",
        "Combined",
    ]
]

# Single domain baseline accuracies (as percentages)
caltech101_baseline = 97.23  # 0.9723 * 100
caltech256_baseline = 75.65  # 0.7565 * 100
cifar100_baseline = 60.13  # Baseline for CIFAR-100 (you may need to adjust this)

# Convert accuracy values to percentages and add delta values for domain columns
df_table["Combined"] = (df_table["Combined"] * 100).round(2)

# Add delta values for domain columns
for idx, row in df_table.iterrows():
    # Caltech-101 column with delta
    acc_101 = row["Caltech-101"] * 100
    delta_101 = acc_101 - caltech101_baseline
    sign_101 = "+" if delta_101 >= 0 else ""
    df_table.loc[idx, "Caltech-101"] = f"{acc_101:.2f} ({sign_101}{delta_101:.2f})"

    # Caltech-256 column with delta
    acc_256 = row["Caltech-256"] * 100
    delta_256 = acc_256 - caltech256_baseline
    sign_256 = "+" if delta_256 >= 0 else ""
    df_table.loc[idx, "Caltech-256"] = f"{acc_256:.2f} ({sign_256}{delta_256:.2f})"

    # CIFAR-100 column with delta (only for three-domain model)
    if row["CIFAR-100"] is not None:
        acc_100 = row["CIFAR-100"] * 100
        delta_100 = acc_100 - cifar100_baseline
        sign_100 = "+" if delta_100 >= 0 else ""
        df_table.loc[idx, "CIFAR-100"] = f"{acc_100:.2f} ({sign_100}{delta_100:.2f})"
    else:
        df_table.loc[idx, "CIFAR-100"] = "N/A"

# Create LaTeX table
latex_table = (
    df_table.style.hide(axis="index")
    .format({"Combined": "{:.2f}"})  # Only format Combined column as numeric
    .to_latex(
        caption="Universal model evaluation results on multi-domain test datasets. Two-domain models were trained on Caltech-101 + Caltech-256, while the three-domain model was trained on all three datasets. Models were evaluated on individual domains as well as the combined test set. Domain accuracy values show performance compared to single-domain ResNet-50 baselines (Caltech-101: 97.23\\%, Caltech-256: 75.65\\%, CIFAR-100: 78.26\\%). All accuracy values are shown as percentages.",
        label="tab:universal_model_results",
        column_format="lcccccc",
        position="ht",
        position_float="centering",
        hrules=True,
    )
)

# Save to file
with open("../thesis/figures/universal_model_results.tex", "w") as f:
    f.write(latex_table)

print("\nLaTeX table preview:")
print(latex_table)

In [None]:
from csv import DictReader
import matplotlib

matplotlib.use("pgf")
import matplotlib.pyplot as plt

# LaTeX settings
plt.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "EB Garamond",
        "font.size": 11,
        "pgf.texsystem": "lualatex",
    }
)

# Create subplot with 3 plots, one for each taxonomy
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Configuration for all three taxonomies
taxonomy_configs = [
    {
        "name": "hypothesis",
        "title": "Hypothesis Taxonomy\\n(2 Domains)",
        "file_prefix": "universal_hypothesis_multi_domain",
    },
    {
        "name": "mcfp",
        "title": "MCFP Taxonomy\\n(2 Domains)",
        "file_prefix": "universal_mcfp_multi_domain",
    },
    {
        "name": "three_domain",
        "title": "Hypothesis Taxonomy\\n(3 Domains)",
        "file_prefix": "universal_three_domain_hypothesis",
    },
]

# Plot training curves for each taxonomy
for idx, config in enumerate(taxonomy_configs):
    ax = axes[idx]

    try:
        # Load training data
        with open(f"training_results/{config['file_prefix']}_train.csv", "r") as f:
            reader = DictReader(f)
            steps_train = []
            train = []
            for row in reader:
                steps_train.append(int(row["Step"]))
                train.append(float(row["Value"]))

        # Load validation data
        with open(f"training_results/{config['file_prefix']}_val.csv", "r") as f:
            reader = DictReader(f)
            steps_val = []
            val = []
            for row in reader:
                steps_val.append(int(row["Step"]))
                val.append(float(row["Value"]))

        # Plot training and validation curves
        ax.plot(steps_train, train, label="Train", color="blue")
        ax.plot(steps_val, val, label="Validation", color="red")

    except FileNotFoundError:
        # If training files don't exist, show a placeholder
        ax.text(
            0.5,
            0.5,
            f"Training data\nnot available",
            ha="center",
            va="center",
            transform=ax.transAxes,
            bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"),
        )

    ax.set_xlabel("Steps")
    ax.set_ylabel("Accuracy")
    ax.set_title(config["title"])
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(
    "../thesis/figures/universal_model_training_curves.pgf", bbox_inches="tight"
)
plt.show()