In [4]:
import pandas as pd
from torchvision.datasets import Caltech256


caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech256_targets = pd.read_csv("data/caltech256.csv")

In [5]:
from library.synthetic_taxonomy import SyntheticTaxonomy


synthetic_taxonomy = SyntheticTaxonomy(
    num_atomic_concepts=257,
    num_domains=2,
    domain_class_count_mean=180,
    domain_class_count_variance=10,
    concept_cluster_size_mean=3,
    concept_cluster_size_variance=1,
)

domain_A = synthetic_taxonomy.domains[0].to_mapping()
domain_B = synthetic_taxonomy.domains[1].to_mapping()

In [None]:
import torch
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import loggers as pl_loggers
from library.models import ResNetModel
from library.datasets import Caltech256MappedDataModule


# Reduce the precision of matrix multiplication to speed up training
torch.set_float32_matmul_precision("medium")

tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs", name="caltech256_synthetic_A")
dataset = Caltech256MappedDataModule(mapping=domain_A)
model_name = "resnet50-caltech256-synthetic-A-min-val-loss"
trainer = Trainer(
    max_epochs=20,
    logger=tb_logger,
    callbacks=[
        # Save the model with the lowest validation loss
        ModelCheckpoint(
            dirpath="checkpoints",
            monitor="val_loss",
            mode="min",
            save_top_k=1,
            filename=model_name,
            enable_version_counter=False,
        )
    ],
)

TRAIN = False

if TRAIN:
    model = ResNetModel(
        num_classes=len(set(domain_A.values())),
        architecture="resnet50",
        optim="sgd",
        optim_kwargs={
            "lr": 0.01,
            "momentum": 0.9,
            "weight_decay": 5e-4,
        },
    )
    trainer.fit(model, datamodule=dataset)

    # Test with the best model from the checkpoint
    results = trainer.test(datamodule=dataset, ckpt_path="best")
else:
    model = ResNetModel.load_from_checkpoint(f"checkpoints/{model_name}.ckpt")
    results = trainer.test(model, datamodule=dataset)

print(results)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/sentinel/.conda/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/sentinel/Development/master-thesis/project/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | ResNet           | 26.3 M | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
26.3 M    Trainable params
0         Non-trainable params
26.3 M    Total params
105.213   Total estimated model params size (MB)
162       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/sentinel/.conda/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/sentinel/.conda/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 19: 100%|██████████| 276/276 [01:25<00:00,  3.24it/s, v_num=14]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 276/276 [01:25<00:00,  3.24it/s, v_num=14]


Restoring states from the checkpoint path at /home/sentinel/Development/master-thesis/project/checkpoints/resnet50-caltech256-synthetic-A-min-val-loss.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/sentinel/Development/master-thesis/project/checkpoints/resnet50-caltech256-synthetic-A-min-val-loss.ckpt
/home/sentinel/.conda/envs/master-thesis/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 35/35 [00:09<00:00,  3.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      eval_accuracy         0.8330308794975281
        eval_loss           0.7417902946472168
        hp_metric           0.8330308794975281
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[{'eval_loss': 0.7417902946472168, 'eval_accuracy': 0.8330308794975281, 'hp_metric': 0.8330308794975281}]


In [None]:
tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs", name="caltech256_synthetic_B")
dataset = Caltech256MappedDataModule(mapping=domain_B)
model_name = "resnet50-caltech256-synthetic-B-min-val-loss"
trainer = Trainer(
    max_epochs=20,
    logger=tb_logger,
    callbacks=[
        # Save the model with the lowest validation loss
        ModelCheckpoint(
            dirpath="checkpoints",
            monitor="val_loss",
            mode="min",
            save_top_k=1,
            filename=model_name,
            enable_version_counter=False,
        )
    ],
)

TRAIN = True

if TRAIN:
    model = ResNetModel(
        architecture="resnet50",
        optim="sgd",
        optim_kwargs={
            "lr": 0.01,
            "momentum": 0.9,
            "weight_decay": 5e-4,
        },
        num_classes=len(set(domain_B.values())),
    )
    trainer.fit(model, datamodule=dataset)

    # Test with the best model from the checkpoint
    results = trainer.test(datamodule=dataset, ckpt_path="best")
else:
    model = ResNetModel.load_from_checkpoint(f"checkpoints/{model_name}.ckpt")
    results = trainer.test(model, datamodule=dataset)

print(results)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


TypeError: ResNetModel.__init__() missing 1 required positional argument: 'num_classes'

In [None]:
import pandas as pd
from torchvision.datasets import Caltech256


caltech256_labels = Caltech256(root="datasets/caltech256", download=False).categories
caltech256_targets = pd.read_csv("data/caltech256.csv")

domain_A = synthetic_taxonomy.domains[0].to_mapping()
domain_B = synthetic_taxonomy.domains[1].to_mapping()

In [None]:
import torch
from library.datasets import Caltech256MappedDataModule
from lightning.pytorch import Trainer


torch.set_float32_matmul_precision("medium")


PREDICT = False

if PREDICT:
    dataset_domain_A = Caltech256MappedDataModule(mapping=domain_A)
    model_domain_A = ResNetModel.load_from_checkpoint(
        "checkpoints/resnet50-caltech256-synthetic-A-min-val-loss.ckpt"
    )
    model_domain_A.eval()

    dataset_domain_B = Caltech256MappedDataModule(mapping=domain_B)
    model_domain_B = ResNetModel.load_from_checkpoint(
        "checkpoints/resnet50-caltech256-synthetic-B-min-val-loss.ckpt"
    )
    model_domain_B.eval()

    trainer = Trainer(logger=False, enable_checkpointing=False)
    model_A_on_domain_B = trainer.predict(model_domain_A, datamodule=dataset_domain_B)
    model_B_on_domain_A = trainer.predict(model_domain_B, datamodule=dataset_domain_A)

    predictions_A_on_B = torch.cat(model_A_on_domain_B).argmax(dim=1)  # type: ignore
    predictions_B_on_A = torch.cat(model_B_on_domain_A).argmax(dim=1)  # type: ignore

    domain_A_targets = torch.cat(
        [label for _, label in dataset_domain_A.predict_dataloader()]
    )
    domain_A_targets = domain_A_targets.map_(
        torch.zeros_like(domain_A_targets), lambda x, _: domain_A[x]
    )
    domain_B_targets = torch.cat(
        [label for _, label in dataset_domain_B.predict_dataloader()]
    )
    domain_B_targets = domain_B_targets.map_(
        torch.zeros_like(domain_B_targets), lambda x, _: domain_B[x]
    )

    df = pd.DataFrame(
        {
            "domain_A": domain_A_targets,
            "predictions_B_on_A": predictions_B_on_A,
        }
    )
    df.to_csv("data/caltech256_domain_A_predictions.csv", index=False)
    df = pd.DataFrame(
        {
            "domain_B": domain_B_targets,
            "predictions_A_on_B": predictions_A_on_B,
        }
    )
    df.to_csv("data/caltech256_domain_B_predictions.csv", index=False)

df_A = pd.read_csv("data/caltech256_domain_A_predictions.csv")
df_B = pd.read_csv("data/caltech256_domain_B_predictions.csv")

In [None]:
from library.taxonomy import Taxonomy
import numpy as np

taxonomy = Taxonomy(
    cross_domain_predictions=[
        (0, 1, np.array(df_B["predictions_A_on_B"], dtype=np.intp)),
        (1, 0, np.array(df_A["predictions_B_on_A"], dtype=np.intp)),
    ],
    domain_targets=[
        (0, np.array(df_A["domain_A"], dtype=np.intp)),
        (1, np.array(df_B["domain_B"], dtype=np.intp)),
    ],
    domain_labels=synthetic_taxonomy.domain_labels,
)
taxonomy.visualize_graph("Synthetic Model Taxonomy").save_graph(
    "output/caltech256_synthetic_model_taxonomy.html"
)

taxonomy.build_universal_taxonomy()
taxonomy.visualize_graph("Synthetic Model Universal Taxonomy").save_graph(
    "output/caltech256_synthetic_model_universal_taxonomy.html"
)

synthetic_taxonomy.visualize_graph("Synthetic Taxonomy").save_graph(
    "output/caltech256_synthetic_taxonomy.html"
)
synthetic_taxonomy.build_universal_taxonomy()
synthetic_taxonomy.visualize_graph("Synthetic Universal Taxonomy").save_graph(
    "output/caltech256_synthetic_universal_taxonomy.html"
)