In [1]:
import os
import json
from pathlib import Path
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve
import pandas as pd

from kymatio.torch import Scattering2D
import pywt
from collections import defaultdict

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
SRC_PATH = "../Data/GenImage/"
generator_names = ["adm", "bgan", "glide", "midj", "sd_14", "sd_15", "vqdm", "wukong"]
with open("classes.json", "r", encoding="utf-8") as f:
    data = json.load(f)
classes_idx = data["1k_idx"]
classes_names = data["21k_idx"]

## Original PaDim (with patch)

In [3]:
import torch.nn.functional as F
import clip
from transformers import AutoImageProcessor, AutoModel, AutoProcessor, AutoModelForImageClassification
import math

  from .autonotebook import tqdm as notebook_tqdm


In [31]:
class ClipDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            image = Image.open(path).convert("RGB")
            image = self.transform(image)
            return image, str(path)
        except:
            print("Failure open image.")
            return None


def clip_collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return None, None
    images, paths = zip(*batch)
    return torch.stack(images), paths


def clip_images(image_folder, batch_size = 64, device='cuda'):
    model, preprocess = clip.load("ViT-B/32", device=device)
    model.float()
    visual = model.visual
    input_dir = Path(image_folder)
    image_paths = list(input_dir.glob("*"))
    dataset = ClipDataset(image_paths, transform=preprocess)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=clip_collate_fn)
    patch_tokens_all = []
    for images, paths in tqdm(dataloader, desc="Image Encoding"):
        with torch.no_grad():
            x = images.to(device)
            x = visual.conv1(x)  # shape = [*, width, grid, grid]
            x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]
            x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]
            x = torch.cat([visual.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1)  # shape = [*, grid ** 2 + 1, width]
            x = x + visual.positional_embedding.to(x.dtype)
            x = visual.ln_pre(x)
            x = x.permute(1, 0, 2)  # NLD -> LND
            x = visual.transformer(x)
            x = x.permute(1, 0, 2)
            patch_tokens = visual.ln_post(x[:, 1:, :])
            patch_tokens_all.append(patch_tokens.cpu())
    patches = torch.cat(patch_tokens_all, dim=0)
    return patches

In [4]:
class Dinov2Dataset(Dataset):
    def __init__(self, image_paths):
        self.image_paths = image_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            image = Image.open(path).convert("RGB")
            return image, str(path)
        except Exception as e:
            print(f"Failure open image because of {e}")
            return None


def dinov2_collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return None, None
    images, paths = zip(*batch)
    return list(images), paths


def dinov2_images(image_folder, batch_size = 64, model_name='facebook/dinov2-with-registers-base', device='cuda'):
    processor = AutoImageProcessor.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name).to(device)
    model.eval()

    input_dir = Path(image_folder)
    image_paths = list(input_dir.glob("*"))
    dataset = Dinov2Dataset(image_paths)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=dinov2_collate_fn)
    patch_tokens_all = []

    for images, paths in tqdm(dataloader, desc="Extracting patch tokens"):
        if images is None:
                continue
        # processor expects a list of PIL images
        inputs = processor(images=images, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            patch_tokens = outputs.last_hidden_state[:, 1:, :]  # remove CLS
            patch_tokens_all.append(patch_tokens.detach().cpu())

    patches = torch.cat(patch_tokens_all, dim=0)
    
    return patches


In [None]:
def dinov2_images_2(image_folder, batch_size = 64, model_name='facebook/dinov2-with-registers-base', device='cuda'):
    processor = AutoImageProcessor.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name).to(device)
    model.eval()

    input_dir = Path(image_folder)
    image_paths = list(input_dir.glob("*"))
    dataset = Dinov2Dataset(image_paths)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=dinov2_collate_fn)
    cls_token_all = []

    for images, paths in tqdm(dataloader, desc="Extracting patch tokens"):
        if images is None:
                continue
        # processor expects a list of PIL images
        inputs = processor(images=images, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            cls_token = outputs.last_hidden_state[:, 0, :]  # remove CLS
            cls_token_all.append(cls_token.detach().cpu())

    cls_tokens = torch.cat(cls_token_all, dim=0)
    # print(cls_tokens.shape)
    
    return cls_tokens

In [None]:
def resize_short_side(img, target_short_side):
    w, h = img.size
    if w < h:
        new_w = target_short_side
        new_h = int(h * (target_short_side / w))
    else:
        new_h = target_short_side
        new_w = int(w * (target_short_side / h))
    return img.resize((new_w, new_h), Image.BILINEAR)


wst_shape = (256, 256)
wst_preprocess = transforms.Compose(
    [
        transforms.Lambda(lambda img: resize_short_side(img, min(wst_shape))),
        transforms.CenterCrop(wst_shape),
        transforms.ToTensor(),
    ]
)

In [None]:
class WSTDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            image = Image.open(path).convert("L")
            image = self.transform(image)
            return image, str(path)
        except:
            print("Failure open image.")
            return None

def wst_collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return None, None
    images, paths = zip(*batch)
    return torch.stack(images), paths

def wst_images(image_folder, batch_size = 64, J=2, seed=2025, device = device):
    scattering = Scattering2D(J=J, shape=wst_shape).to(device)
    dataset = WSTDataset(image_folder, transform=wst_preprocess)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=wst_collate_fn)
    wst_patches = []
    with torch.no_grad():
        for batch in tqdm(dataloader):
            batch_images, batch_paths = batch
            batch_images = batch_images.to(device)  # shape: [batch_size, 1, H, W]
            coeffs = scattering(batch_images)  # 对整个批次进行散射变换，形状: [batch_size, C, C', H', W']
            coeffs = coeffs.squeeze(1)
            num_channels = coeffs.size(1)  # 81
            torch.cuda.manual_seed(seed)
            selected_channels = torch.randperm(num_channels)[:16]
            coeffs = coeffs[:, selected_channels]
            coeffs_pooled = torch.nn.functional.adaptive_avg_pool2d(coeffs, output_size=(32, 32))
            coeffs_flattened = coeffs_pooled.flatten(start_dim = 2)
            wst_patches.append(coeffs_flattened.cpu())
    patches = torch.cat(wst_patches, dim=0)
    return patches

In [None]:
class GaussianRandomProjection:
    def __init__(self, input_dim, target_dim, device='cuda', seed=None):
        """
        input_dim:  D
        target_dim:  d
        device: 'cuda' or 'cpu'
        """
        self.input_dim = input_dim
        self.target_dim = target_dim
        self.device = device

        if seed is not None:
            torch.manual_seed(seed)

        # shape (D, d)
        self.proj_matrix = torch.randn(input_dim, target_dim, device=device, dtype=torch.float32)

    def project(self, X):
        """
        X: Tensor of shape (B, D)
        Return: Tensor of shape (B, d)
        """
        if X.device != self.device:
            X = X.to(self.device)
        return torch.matmul(X, self.proj_matrix)  # shape (B, d)
    
class SparseRandomProjection:
    def __init__(self, input_dim, target_dim, s=None, device='cuda', seed=None):
        """
        input_dim:  D
        target_dim:  d
        s:  parameter of sparsity, by default sqrt(D)
        device: cuda or cpu
        """
        self.input_dim = input_dim
        self.target_dim = target_dim
        self.device = device
        self.s = s or int(math.sqrt(input_dim))

        if seed is not None:
            torch.manual_seed(seed)

        self.proj_matrix = self._generate_sparse_projection_matrix()

    def _generate_sparse_projection_matrix(self):
        D, d, s = self.input_dim, self.target_dim, self.s
        R = torch.zeros(D, d, device=self.device)

        rand_vals = torch.rand(D, d, device=self.device)

        pos_mask = rand_vals < (1 / (2 * s))
        neg_mask = (rand_vals >= (1 / (2 * s))) & (rand_vals < (1 / s))

        R[pos_mask] = math.sqrt(s)
        R[neg_mask] = -math.sqrt(s)

        return R  # shape: (D, d)

    def project(self, X):
        """
        X: Tensor of shape (B, D)
        Return: Tensor of shape (B, d)
        """
        if X.device != self.device:
            X = X.to(self.device)
        return X @ self.proj_matrix  # shape (B, d)

In [15]:
def extract_patches(image_folder, extract_fn):
    patches = extract_fn(image_folder)
    return patches

def gt_compute(embeddings, eps=1e-3):
    mean = embeddings.mean(dim=0, keepdim=True)
    X = embeddings - mean
    cov = X.T @ X / (embeddings.size(0) - 1)
    cov += eps * torch.eye(cov.size(0), device=embeddings.device)
    return mean, cov

def mahalanobis_distance(x, mean, cov):
    x = x.to(torch.float32).view(-1)
    mean = mean.to(torch.float32).view(-1)
    delta = x - mean
    cov = cov.to(torch.float32)

    try:
        sol = torch.linalg.solve(cov, delta.unsqueeze(1))  # [D, 1]
        dist_squared = delta @ sol.squeeze()
        if dist_squared < 0:
            print("Warning: distance squared < 0", dist_squared.item())
            dist_squared = torch.clamp(dist_squared, min=0.0)
        dist = torch.sqrt(dist_squared)
        return dist
    except RuntimeError as e:
        print("Runtime error in Mahalanobis:", e)
        return torch.tensor(float("nan"), device=x.device)

In [39]:

def padim_detector(ai_path, cls_path, extract_fn, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    projector = SparseRandomProjection(input_dim=768, target_dim=100, device = device, seed = 2025) 
    gt_patches = extract_patches(cls_path + "/nature", extract_fn)
    baseline_patches = extract_patches(cls_path + "/nature_2", extract_fn)
    ai_patches = extract_patches(ai_path, extract_fn)
    test_patches = torch.cat([ai_patches,baseline_patches], dim=0)
    gt_patches = projector.project(gt_patches)
    test_patches = projector.project(test_patches)
    labels = np.concatenate((np.zeros(ai_patches.shape[0]), np.ones(baseline_patches.shape[0])))
    gt_coeffs =[]
    test_scores = []
      

    for i in range(gt_patches.shape[1]):
        # gt_tensor = projector.project(gt_patches[:,i,:])
        gt_tensor = gt_patches[:, i, :]
        gt_mean, gt_cov = gt_compute(gt_tensor)
        gt_mean, gt_cov = gt_mean.to(device), gt_cov.to(device)
        gt_coeffs.append((gt_mean, gt_cov))
    
    for sample in test_patches:
        score_per_patch = []
        for i in range(test_patches.shape[1]):
            distance = mahalanobis_distance(sample[i], gt_coeffs[i][0], gt_coeffs[i][1])
            score_per_patch.append(-distance.cpu())
        test_scores.append(min(score_per_patch))
    scores = np.array(test_scores)

    fpr, tpr, thresholds = roc_curve(labels, scores)
    idx = np.where(tpr >= 0.95)[0][0]
    fpr_95 = fpr[idx]

    distances = np.sqrt((1 - tpr) ** 2 + fpr**2)
    best_threshold = thresholds[np.argmin(distances)]
    print("Best threshold(ROC):", best_threshold)

    roc_auc = roc_auc_score(labels, scores)
    # print("AUROC:", roc_auc)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    ax1.plot(fpr, tpr, color="darkorange", lw=2, label=f"ROC curve (area = {roc_auc:.2f})")
    ax1.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
    ax1.set_xlim([0.0, 1.0])
    ax1.set_ylim([0.0, 1.05])
    ax1.set_xlabel("False Positive Rate (FPR)")
    ax1.set_ylabel("True Positive Rate (TPR)")
    ax1.set_title("Receiver Operating Characteristic (ROC) Curve")
    ax1.legend(loc="lower right")

    precision, recall, thresholds = precision_recall_curve(labels, scores)
    pr_auc = auc(recall, precision)
    # print("AUPRC:", pr_auc)

    ax2.plot(recall, precision, color="blue", lw=2, label=f"PR curve (area = {pr_auc:.2f})")
    ax2.set_xlim([0.0, 1.0])
    ax2.set_ylim([0.0, 1.05])
    ax2.set_xlabel("Recall")
    ax2.set_ylabel("Precision")
    ax2.set_title("Precision-Recall Curve")
    ax2.legend(loc="best")

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
    return roc_auc, pr_auc, fpr_95


In [None]:
data = {"CLASS": [x for x in classes_idx for _ in range(3)], "GENERATOR": generators, "AUROC": auroc, "AUPRC": auprc}
df = pd.DataFrame(data)
df.to_csv(embedder + "_patchcore_result.csv", index=False)
print(embedder + f" auroc: {np.mean(auroc)}, auprc : {np.mean(auprc)}")

In [40]:
extract_fns = [clip_images, dinov2_images, wst_images]
for idx, embedder in enumerate(["clip", "dinov2", "wst"]):
    auroc = []
    auprc = []
    fpr95 = []
    generator = []
    extract_fn = extract_fns[idx]
    for cls in classes_idx:
        r1, p1, f1 = padim_detector(
            SRC_PATH + cls + "/bgan",
            SRC_PATH + cls,
            extract_fn,
            f"../Data/Results/PaDim/{embedder}/{cls}/bgan.png",
        )
        generator.append("bgan")
        auroc.append(r1)
        auprc.append(p1)
        fpr95.append(f1)
        r2, p2, f2 = padim_detector(
            SRC_PATH + cls + "/midj",
            SRC_PATH + cls,
            extract_fn,
            f"../Data/Results/PaDim/{embedder}/{cls}/midj.png",
        )
        generator.append("midj")
        auroc.append(r2)
        auprc.append(p2)
        fpr95.append(f2)
        r3, p3, f3 = padim_detector(
            SRC_PATH + cls + "/sd_15",
            SRC_PATH + cls,
            extract_fn,
            f"../Data/Results/PaDim/{embedder}/{cls}/sd_15.png",
        )
        generator.append("sd_15")
        auroc.append(r3)
        auprc.append(p3)
        fpr95.append(f3)
    data = {
        "CLASS": [x for x in classes_idx for _ in range(3)],
        "GENERATOR": generator,
        "AUROC": auroc,
        "AUPRC": auprc,
        "FPR95": fpr95,
    }
    df = pd.DataFrame(data)
    df.to_csv(embedder + "_PaDim_s_result.csv", index=False)
    print(f"{embedder} auroc: {np.mean(auroc)}, auprc: {np.mean(auprc)}, fpr95: {np.mean(fpr95)}")

Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.83it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.57it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  4.15it/s]


Best threshold(ROC): -21.30688


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.92it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.07it/s]
Image Encoding: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]


Best threshold(ROC): -23.630703


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.89it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.00it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.22it/s]


Best threshold(ROC): -23.93758


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.36it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.03it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.34it/s]


Best threshold(ROC): -22.699675


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.25it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.28it/s]
Image Encoding: 100%|██████████| 3/3 [00:06<00:00,  2.04s/it]


Best threshold(ROC): -22.655165


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.36it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.28it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.20it/s]


Best threshold(ROC): -22.699675


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.99it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.97it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.82it/s]


Best threshold(ROC): -23.55883


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.09it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]
Image Encoding: 100%|██████████| 3/3 [00:06<00:00,  2.09s/it]


Best threshold(ROC): -25.736683


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.00it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.10it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.29it/s]


Best threshold(ROC): -21.37226


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.21it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.42it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.28it/s]


Best threshold(ROC): -23.479492


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.11it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.97it/s]
Image Encoding: 100%|██████████| 3/3 [00:06<00:00,  2.06s/it]


Best threshold(ROC): -23.560026


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.65it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.09it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.27it/s]


Best threshold(ROC): -25.36506


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.42it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.24it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.31it/s]


Best threshold(ROC): -24.048674


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.49it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]
Image Encoding: 100%|██████████| 3/3 [00:05<00:00,  1.99s/it]


Best threshold(ROC): -20.995638


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.33it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.14it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]


Best threshold(ROC): -20.884888


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.71it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.38it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.52it/s]


Best threshold(ROC): -24.814566


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.69it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.32it/s]
Image Encoding: 100%|██████████| 3/3 [00:05<00:00,  1.86s/it]


Best threshold(ROC): -22.807056


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.78it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.46it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.29it/s]


Best threshold(ROC): -24.624828


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.79it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.41it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.33it/s]


Best threshold(ROC): -25.372501


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.28it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.49it/s]
Image Encoding: 100%|██████████| 3/3 [00:05<00:00,  1.99s/it]


Best threshold(ROC): -25.483753


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.66it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.24it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.27it/s]


Best threshold(ROC): -20.141703


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.45it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.05it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.39it/s]


Best threshold(ROC): -24.341545


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.33it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.33it/s]
Image Encoding: 100%|██████████| 3/3 [00:05<00:00,  1.71s/it]


Best threshold(ROC): -25.184122


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.51it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.38it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.26it/s]


Best threshold(ROC): -24.4355


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.05it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.13it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.31it/s]


Best threshold(ROC): -23.805202


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.34it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.11it/s]
Image Encoding: 100%|██████████| 3/3 [00:06<00:00,  2.10s/it]


Best threshold(ROC): -23.805202


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  1.97it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.07it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.28it/s]


Best threshold(ROC): -23.485922


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.52it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.07it/s]
Image Encoding: 100%|██████████| 3/3 [00:00<00:00,  3.71it/s]


Best threshold(ROC): -23.101141


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.72it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.40it/s]
Image Encoding: 100%|██████████| 3/3 [00:06<00:00,  2.22s/it]


Best threshold(ROC): -22.97986


Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.66it/s]
Image Encoding: 100%|██████████| 3/3 [00:01<00:00,  2.21it/s]
Image Encoding: 100%|██████████| 3/3 [00:02<00:00,  1.31it/s]


Best threshold(ROC): -23.494844
clip auroc: 0.3748983894731494, auprc: 0.43654577297612573, fpr95: 0.9843621399176953


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.15s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.03s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.33it/s]


Best threshold(ROC): -28.292461


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.08s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.01s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.50s/it]


Best threshold(ROC): -30.082832


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.07s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.01it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.25s/it]


Best threshold(ROC): -28.335688


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.02it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.07it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.33it/s]


Best threshold(ROC): -34.487125


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.03it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.67s/it]


Best threshold(ROC): -32.630066


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.31s/it]


Best threshold(ROC): -41.53404


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.12s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.09s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.25it/s]


Best threshold(ROC): -32.458443


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.10s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.05s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:08<00:00,  2.79s/it]


Best threshold(ROC): -32.58898


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.08s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.03s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.29s/it]


Best threshold(ROC): -30.047487


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.01s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.26it/s]


Best threshold(ROC): -26.330925


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:08<00:00,  2.77s/it]


Best threshold(ROC): -28.31334


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.01it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.02s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:04<00:00,  1.34s/it]


Best threshold(ROC): -26.524956


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.03it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.04it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.30it/s]


Best threshold(ROC): -27.312551


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:08<00:00,  2.67s/it]


Best threshold(ROC): -25.720102


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.06it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.07it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.15s/it]


Best threshold(ROC): -25.33463


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.32it/s]


Best threshold(ROC): -25.446054


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.10it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.42s/it]


Best threshold(ROC): -24.81379


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.18it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.18it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.14s/it]


Best threshold(ROC): -25.950356


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.19it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.19it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.44it/s]


Best threshold(ROC): -29.50078


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.19it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.20it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.42s/it]


Best threshold(ROC): -29.30211


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.18it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.20it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.12s/it]


Best threshold(ROC): -26.02436


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.11it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.36it/s]


Best threshold(ROC): -29.42842


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.11it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.07it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.42s/it]


Best threshold(ROC): -27.990944


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.03it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.07it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.18s/it]


Best threshold(ROC): -27.875927


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.02s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.00it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.30it/s]


Best threshold(ROC): -31.44082


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.01it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.06it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:07<00:00,  2.65s/it]


Best threshold(ROC): -30.74945


Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.04s/it]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.00it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


Best threshold(ROC): -32.020275


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.11it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.05it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.31it/s]


Best threshold(ROC): -26.8487


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.12it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.05it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:08<00:00,  2.76s/it]


Best threshold(ROC): -26.299387


Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.14it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Extracting patch tokens: 100%|██████████| 3/3 [00:03<00:00,  1.23s/it]


Best threshold(ROC): -24.253609
dinov2 auroc: 0.5070200172737895, auprc: 0.5548537427102228, fpr95: 0.9337448559670781


  0%|          | 0/1 [00:00<?, ?it/s]


Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.
Failure open image.


AttributeError: 'NoneType' object has no attribute 'to'