In [1]:
import sys
import os

# This gets the parent directory of the current working directory (i.e., the project root)
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
from utils.datasets import CLASS_NAMES, get_data, base_novel_categories, split_data
import clip
import torch
device = torch.device("cpu")

clip_model, preprocess = clip.load("RN50", device=device)

train_set, val_set, test_set = get_data(data_dir="../cifar10", train_transform=preprocess, eval_transform=preprocess)

# split classes into base and novel
base_classes, novel_classes = base_novel_categories(train_set)

# split the three datasets
train_base, train_novel = split_data(train_set, base_classes)
val_base, val_novel = split_data(val_set, base_classes)
test_base, test_novel = split_data(test_set, base_classes)
        


In [3]:
from model.cocoop.custom_clip import CustomCLIP
pth_path = "../bin/cocoop/after_first_train_from_yaml_base_kl_v2_80_20_kl_03_rot_period_3_4_ctx_balanced_20250720_134756.pth"
model = CustomCLIP.load_from_checkpoint(
    classnames=[CLASS_NAMES[c] for c in base_classes],
    checkpoint_path=pth_path,
    device="cpu",
    n_ctx=4,
    clip_model=clip_model,
    ctx_init=""
)

⚠️ Using float32 for meta_net due to MPS
self.dtype=torch.float32
⚠️ Using float32 for meta_net due to MPS


In [4]:
from training_systems.evaluation_methods import FineTunedTestStep


def compute_evaluation(model):
    """
    Run evaluation on the test split for both base and novel classes.

    Args:
        epoch_idx (int): Epoch index for logging.
        base (bool): Whether to evaluate the frozen base CLIP model.

    Returns:
        Tuple[float, float]: Base and novel class test accuracy.

    model = self.model if not base else self.clip_model
    base_accuracy = test_step(model, self.test_base, self.base_classes, self.batch_size, self.device, label="test", base=base)
    novel_accuracy = test_step(model, self.test_novel, self.novel_classes, self.batch_size, self.device, label="test", base=base)
    """
    finetuned_test_method = FineTunedTestStep(
            model=model,
            batch_size=10,
    )

    base_metrics = finetuned_test_method.evaluate(
        dataset=test_base,
        classnames=base_classes,
        desc_add=" - Base Fine Tuned",
    )
    novel_metrics = finetuned_test_method.evaluate(
        dataset=test_novel,
        classnames=novel_classes,
        desc_add=" - Novel Fine Tuned",
    )

    base_accuracy = base_metrics["accuracy"]
    novel_accuracy = novel_metrics["accuracy"]

    return base_accuracy, novel_accuracy

In [None]:
base_acc, novel_acc = compute_evaluation(model)

⚠️ Using float32 for meta_net due to MPS



Test (Finetuned)  - Base Fine Tuned:   0%|          | 0/248 [00:00<?, ?it/s][A
Test (Finetuned)  - Base Fine Tuned:   0%|          | 0/248 [00:34<?, ?it/s, accuracy=0.6][A
Test (Finetuned)  - Base Fine Tuned:   0%|          | 1/248 [00:34<2:21:14, 34.31s/it, accuracy=0.6][A
Test (Finetuned)  - Base Fine Tuned:   0%|          | 1/248 [00:38<2:21:14, 34.31s/it, accuracy=0.5][A
Test (Finetuned)  - Base Fine Tuned:   1%|          | 2/248 [00:38<1:08:54, 16.81s/it, accuracy=0.5][A
Test (Finetuned)  - Base Fine Tuned:   1%|          | 2/248 [00:43<1:08:54, 16.81s/it, accuracy=0.433][A
Test (Finetuned)  - Base Fine Tuned:   1%|          | 3/248 [00:43<46:26, 11.37s/it, accuracy=0.433]  [A
Test (Finetuned)  - Base Fine Tuned:   1%|          | 3/248 [00:48<46:26, 11.37s/it, accuracy=0.375][A
Test (Finetuned)  - Base Fine Tuned:   2%|▏         | 4/248 [00:48<35:26,  8.72s/it, accuracy=0.375][A
Test (Finetuned)  - Base Fine Tuned:   2%|▏         | 4/248 [00:53<35:26,  8.72s/it, accuracy