In [None]:
# Import necessary libraries

import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm.notebook import tqdm
from glob import glob
from PIL import Image
import numpy as np
import pickle

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Load the universal feature extractor

backbone_arch = models.wide_resnet50_2(weights=None)
feature_extractor = nn.Sequential(*list(backbone_arch.children())[:-2]).to(device)

feature_extractor.load_state_dict(torch.load("universal_feature_extractor.pth"))
feature_extractor.eval()
print("‚úÖ Universal feature extractor loaded and set to evaluation mode.")

In [None]:
# Dataset Preparation for Golden Samples

class GoldenSampleDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        return self.transform(image)

inference_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

golden_sample_paths = glob("/kaggle/input/mvtec-ad/carpet/train/good/*.png")
golden_dataset = GoldenSampleDataset(golden_sample_paths, inference_transform)
golden_loader = DataLoader(golden_dataset, batch_size=32, shuffle=False)
print(f"Prepared {len(golden_dataset)} golden samples for on-site adaptation.")

In [None]:
# Feature Extraction

features = {}
def get_features_hook(name):
    def hook(model, input, output):
        features[name] = output
    return hook

feature_extractor[5].register_forward_hook(get_features_hook('layer2'))
feature_extractor[6].register_forward_hook(get_features_hook('layer3'))

memory_bank = []
print("\nüîç Extracting features from golden samples...")

with torch.no_grad():
    for images in tqdm(golden_loader, desc="Feature Extraction"):
        images = images.to(device)
        _ = feature_extractor(images)
        layer2_features = features['layer2']
        layer3_features = features['layer3']
        upsampled_layer3 = torch.nn.functional.interpolate(layer3_features, size=layer2_features.shape[2:], mode='bilinear', align_corners=False)
        combined_features = torch.cat((layer2_features, upsampled_layer3), dim=1)
        patch_embeddings = combined_features.permute(0, 2, 3, 1).flatten(0, 2).cpu().numpy()
        memory_bank.append(patch_embeddings)
        
memory_bank = np.concatenate(memory_bank, axis=0)
print(f"‚úÖ Memory bank created with {memory_bank.shape[0]} feature vectors.")

In [None]:
# Coreset Subsampling

def greedy_coreset_subsampling(feature_vectors, percentage=0.01):
    n_samples = int(len(feature_vectors) * percentage)
    if n_samples == 0: n_samples = 1
    
    print(f"\nüß† Starting coreset subsampling to select {n_samples} representative features...")
    coreset_indices = [np.random.randint(len(feature_vectors))]
    min_distances = np.linalg.norm(feature_vectors - feature_vectors[coreset_indices[0]], axis=1)
    
    progress = tqdm(range(1, n_samples), desc="Coreset Subsampling")
    for _ in progress:
        next_idx = np.argmax(min_distances)
        coreset_indices.append(next_idx)
        new_distances = np.linalg.norm(feature_vectors - feature_vectors[next_idx], axis=1)
        min_distances = np.minimum(min_distances, new_distances)
    return feature_vectors[coreset_indices]

coreset = greedy_coreset_subsampling(memory_bank, percentage=0.01)
print(f"‚úÖ Coreset created. Final size: {coreset.shape[0]} feature vectors.")

In [None]:
# Product Specific Coreset Preparation

product_name = "ENTER_PRODUCT_NAME"
with open(f"{product_name}_coreset.pkl", "wb") as f:
    pickle.dump(coreset, f)
    
print(f"\nüíæ Product-specific memory coreset saved to {product_name}_coreset.pkl")

Using device: cuda
‚úÖ Universal feature extractor loaded and set to evaluation mode.
Prepared 280 golden samples for on-site adaptation.

üîç Extracting features from golden samples...


Feature Extraction:   0%|          | 0/9 [00:00<?, ?it/s]

‚úÖ Memory bank created with 219520 feature vectors.

üß† Starting coreset subsampling to select 2195 representative features...


Coreset Subsampling:   0%|          | 0/2194 [00:00<?, ?it/s]