In [4]:
import numpy as np  
from matplotlib import pyplot as plt 
from PIL import Image   
from pytorch_lightning import Trainer   
import torch

from anomalib.config import get_configurable_parameters
from anomalib.data import get_datamodule
from anomalib.models import get_model  
from anomalib.data.utils import read_image
from anomalib.utils.callbacks import LoadModelCallback, get_callbacks
from anomalib.models.components import feature_extractors

In [5]:
MODEL = "patchcore"
CONFIG_PATH = f"/home/students/tyang/anomalib/src/anomalib/models/{MODEL}/config.yaml"
with open(file=CONFIG_PATH, mode="r",encoding="utf-8") as f:
    print(f.read())
    
config = get_configurable_parameters(config_path=CONFIG_PATH)


dataset:
  name: airogs
  format: airogs
  path: /home/students/tyang/airogs 
  task: classification # options: [classification, segmentation]
  category: 0
  number_of_samples: 17999
  pre_selection: False
  train_batch_size: 1000
  eval_batch_size: 32
  num_workers: 8
  image_size: 256 # dimensions to which images are resized (mandatory)
  center_crop: 224 # dimensions to which images are center-cropped after resizing (optional)
  normalization: imagenet # data distribution to which the images will be normalized: [none, imagenet]
  transform_config:
    train: null
    eval: null
  test_split_mode: from_dir # options: [from_dir, synthetic]
  test_split_ratio: 0.1 # fraction of train images held out testing (usage depends on test_split_mode)
  val_split_mode: same_as_test # options: [same_as_test, from_test, synthetic]
  val_split_ratio: 0.1 # fraction of train/test images held out for validation (usage depends on val_split_mode)
  tiling:
    apply: false
    tile_size: null
    stri

In [6]:

data_module = get_datamodule(config=config)
data_module.prepare_data() # check if the dataset is avaliable
data_module.setup()




i, train_data = next(enumerate(data_module.train_dataloader()))





In [7]:
import torchvision
from anomalib.models.components.feature_extractors import TorchFXFeatureExtractor
from torchvision.models.densenet import DenseNet201_Weights

import torchvision.models as models
print(torchvision.models.feature_extraction.get_graph_node_names(models.densenet201(pretrained=True)))

feature_extractor = TorchFXFeatureExtractor(
                    backbone="densenet201",
                    return_nodes=["features.denseblock1.denselayer6.conv2", "features.denseblock2.denselayer12.conv2"],
                    weights=DenseNet201_Weights.IMAGENET1K_V1,
                )

#features = feature_extractor(train_data["image"])

  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


(['x', 'features.conv0', 'features.norm0', 'features.relu0', 'features.pool0', 'features.denseblock1.denselayer1.cat', 'features.denseblock1.denselayer1.norm1', 'features.denseblock1.denselayer1.relu1', 'features.denseblock1.denselayer1.conv1', 'features.denseblock1.denselayer1.norm2', 'features.denseblock1.denselayer1.relu2', 'features.denseblock1.denselayer1.conv2', 'features.denseblock1.denselayer2.cat', 'features.denseblock1.denselayer2.norm1', 'features.denseblock1.denselayer2.relu1', 'features.denseblock1.denselayer2.conv1', 'features.denseblock1.denselayer2.norm2', 'features.denseblock1.denselayer2.relu2', 'features.denseblock1.denselayer2.conv2', 'features.denseblock1.denselayer3.cat', 'features.denseblock1.denselayer3.norm1', 'features.denseblock1.denselayer3.relu1', 'features.denseblock1.denselayer3.conv1', 'features.denseblock1.denselayer3.norm2', 'features.denseblock1.denselayer3.relu2', 'features.denseblock1.denselayer3.conv2', 'features.denseblock1.denselayer4.cat', 'feat

In [5]:
#print(features["features.denseblock1.denselayer6.conv2"].shape)
#print(features["features.denseblock2.denselayer12.conv2"].shape)
#print(train_data["image"].shape)

torch.Size([1000, 32, 56, 56])
torch.Size([1000, 32, 28, 28])
torch.Size([1000, 3, 224, 224])


In [8]:
feature_list = []

for  i, train_data in enumerate(data_module.train_dataloader()):
    features = feature_extractor(train_data["image"])["features.denseblock1.denselayer6.conv2"]
    feature_list.append(features)



KeyboardInterrupt: 

In [None]:
global_feature_tensor = torch.vstack(feature_list)   

In [None]:
print(global_feature_tensor.shape)

torch.Size([15658, 32, 56, 56])


In [None]:
from anomalib.models.components.cluster.kmeans import KMeans

def get_kmeans_centers(feature_t, n_clusters):
    feature_t = feature_t.permute(1,0,2,3)
    feature_t = feature_t.flatten(start_dim=1)
    feature_t= feature_t.permute(1,0)


    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(feature_t)
    cluster_center = kmeans.cluster_centers_
   
    return cluster_center, kmeans



In [None]:
clusters_centers,kmeans = get_kmeans_centers(global_feature_tensor, n_clusters=12)

print(clusters_centers.shape)
print(len(clusters_centers))

torch.Size([12, 32])
12


In [None]:
def bag_of_words_statistics(Ptst, Cref, S):
    """Params:

    Ptst: Feature tensor of a set of images, tensor of shape (N, C, H, W)
    Cref: Cluster centers, tensor of shape (K, C)
    S: number of subregions per image dimension, integer

    Returns:
    bow_stats: list of Bag-of-words statistics, length N, each element is a tensor of shape (S * S, K)
    """
    Ptst = torch.vsplit(Ptst, Ptst.shape[0])
    bow_stats = []
    for Itst in Ptst:
        Itst = Itst.squeeze(0)
        
        subtensors = torch.chunk(Itst, S, dim=1)
        subtensor = [torch.chunk(st, S, dim=2) for st in subtensors]
        
        
        image_bow_stats = torch.zeros(S * S, len(Cref), dtype=torch.float32)
        for i in range(S):
            for j in range(S):
                new_subtensor = subtensor[i][j]
                
                new_subtensor = new_subtensor.flatten(start_dim=1)
                
                new_subtensor = new_subtensor.permute(1,0)
                
                cluster_idx = kmeans.predict(new_subtensor)
                
                cluster_idx = cluster_idx.float()
                hist = torch.histc(cluster_idx, bins = len(Cref), min = torch.min(cluster_idx), max = torch.max(cluster_idx))
                normalized_hist = hist / torch.sum(hist)
                
                image_bow_stats[i * S + j] = normalized_hist
        

        bow_stats.append(image_bow_stats)
            
        
    return bow_stats


In [None]:
global_bow_stats = bag_of_words_statistics(global_feature_tensor, clusters_centers, S=4)

In [None]:
import torch.nn.functional as F

stacked_bow_stats = torch.vstack(global_bow_stats)
stacked_bow_stats_reshaped = stacked_bow_stats.view(len(global_bow_stats),1,16,12)
stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)
stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10


RuntimeError: [enforce fail at alloc_cpu.cpp:83] err == 0. DefaultCPUAllocator: can't allocate memory: you tried to allocate 188292836352 bytes. Error code 12 (Cannot allocate memory)

In [None]:
kl_divegence = F.kl_div(stacked_bow_stats_reshaped[:2000].log(), stacked_bow_stats_transposed,reduction="none")
print(kl_divegence.shape)
print(kl_divegence[0][1])
sum = torch.sum(kl_divegence,dim=3)
print(sum.shape)
print(sum[0][1])
topk,ids = torch.topk(sum, k=11, dim=2, largest=False, sorted=True)
print(topk.shape)
print(topk[0][1])
dist_matrix = torch.mean(topk,dim=2)
print(dist_matrix.shape)
print(dist_matrix[0][1])
dist_l = torch.sum(dist_matrix,dim=1)
print(dist_l.shape)
topk_far,idxs = torch.topk(sum, k=int(len(dist_l) * 0.1), dim=0, largest=True, sorted=True)
print(idxs)


NameError: name 'F' is not defined

In [None]:
merged_data = {"image_path":[], "label":[] }
for  i, train_data in enumerate(data_module.train_dataloader()):
    selected_data = {index: {"image_path": train_data["image_path"][index], "label": train_data["label"][index]} for index in idxs}
    for key, values in selected_data.items():
        merged_data[key].append(values)

print(merged_data["image_path"])

In [None]:
import csv 
import os
csv_path = "/home/students/tyang/Documents/cpr_trainingdata.csv"

#if not os.path.isfile(csv_path):
with open(csv_path, mode="w", newline="") as csv_file:
        fieldnames = ["image_path", "label"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        for i in range(len(merged_data["image_path"])):
            rowdict = {"image_path": merged_data["image_path"][i], "label": merged_data["label"][i]}
            writer.writerow(rowdict)

In [98]:
i, train_data = next(enumerate(data_module.train_dataloader()))
features = feature_extractor(train_data["image"])
feature = features["features.denseblock1.denselayer6.conv2"]
#bow_stats = bag_of_words_statistics(feature, clusters_centers, 4)


In [99]:
#print(len(bow_stats))
#print(bow_stats[1].shape)
#print(bow_stats)

1000
torch.Size([16, 12])
[tensor([[0.0714, 0.0000, 0.0000, 0.0000, 0.7704, 0.0051, 0.0000, 0.0000, 0.1122,
         0.0000, 0.0000, 0.0408],
        [0.1276, 0.0000, 0.0816, 0.0714, 0.1071, 0.1429, 0.0459, 0.0459, 0.0255,
         0.2449, 0.1020, 0.0051],
        [0.0867, 0.0714, 0.0510, 0.0459, 0.1020, 0.0969, 0.0357, 0.1378, 0.0612,
         0.1122, 0.0867, 0.1122],
        [0.0408, 0.0408, 0.0102, 0.7959, 0.0102, 0.0000, 0.0000, 0.0408, 0.0102,
         0.0102, 0.0306, 0.0102],
        [0.0816, 0.0102, 0.0102, 0.0000, 0.5561, 0.0408, 0.0816, 0.0000, 0.1020,
         0.0000, 0.0000, 0.1173],
        [0.0663, 0.0051, 0.0867, 0.0000, 0.1429, 0.1122, 0.0204, 0.0204, 0.1276,
         0.3061, 0.0969, 0.0153],
        [0.1020, 0.0204, 0.0969, 0.0102, 0.1582, 0.1071, 0.0867, 0.0510, 0.0357,
         0.1429, 0.1327, 0.0561],
        [0.0816, 0.0102, 0.5459, 0.0000, 0.0765, 0.1020, 0.0000, 0.0000, 0.0663,
         0.0000, 0.1071, 0.0102],
        [0.0153, 0.0816, 0.0204, 0.5510, 0.0255, 0.09

In [100]:
import torch.nn.functional as F

def kl_distance(i_hist, j_hist):
    """Params:
    i_hist: bow histogram of image i, tensor of shape (1, s * s, K )
    j_hist: bow histogram of image j, tensor of shape (1, s * s, K )

    Returns:
    kl_dist: kl distance between image i and image j, tensor of shape (1)
    """
    i_hist[i_hist == 0] = 1e-10
    j_hist[j_hist == 0] = 1e-10
    kl_divegence = F.kl_div(j_hist.log(), i_hist,reduction="none")
    print(kl_divegence.shape)
    sum = torch.sum(kl_divegence,dim=-1)
    print(sum.shape)
    topk,ids = torch.topk(sum, k=11, dim=0, largest=False, sorted=True)
    print(topk.shape)
    #print(topk)
    dist = torch.mean(topk)
    print(dist.shape)
    return dist
    
    

    

In [116]:
stacked_bow_stats = torch.vstack(bow_stats)
stacked_bow_stats_reshaped = stacked_bow_stats.view(1000,1,16,12)
stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)
stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10
kl_divegence = F.kl_div(stacked_bow_stats_reshaped.log(), stacked_bow_stats_transposed,reduction="none")
print(kl_divegence.shape)
print(kl_divegence[0][1])
sum = torch.sum(kl_divegence,dim=3)
print(sum.shape)
print(sum[0][1])
topk,ids = torch.topk(sum, k=11, dim=2, largest=False, sorted=True)
print(topk.shape)
print(topk[0][1])
dist_matrix = torch.mean(topk,dim=2)
print(dist_matrix.shape)
print(dist_matrix[0][1])
dist_l = torch.sum(dist_matrix,dim=1)
print(dist_l.shape)
topk_far,idxs = torch.topk(sum, k=int(len(dist_l) * 0.1), dim=0, largest=True, sorted=True)
print(idxs)

torch.Size([1000, 1000, 16, 12])
tensor([[ 1.3497e-01,  4.9381e-01,  9.0550e-02,  8.9146e+00, -1.2949e-01,
         -1.7748e-09,  1.3473e+00,  3.6904e+00, -2.4468e-02,  3.9049e-01,
          1.6751e+00, -1.0609e-02],
        [-1.4349e-02,  1.8817e-01, -2.8292e-02, -2.5567e-02,  0.0000e+00,
          1.1651e-01, -1.1210e-02, -4.8075e-03,  1.2941e-01,  1.0414e-02,
         -3.5365e-02,  4.1057e-02],
        [ 1.0782e-02, -1.9856e-02,  1.1163e-02, -4.8075e-03,  5.4976e-02,
          4.8407e-02, -1.2969e-02, -4.6043e-02,  9.3618e-02,  8.2912e-02,
          1.6584e-02, -4.0227e-02],
        [-1.5013e-02,  9.3499e-02,  1.0970e-01, -2.7584e-01,  1.2415e-01,
          7.0334e-01,  2.8846e-01,  2.5100e-01,  0.0000e+00,  4.4742e-02,
          3.4018e-02,  3.3631e-02],
        [ 3.5745e-02, -1.8441e-09,  6.2061e-03,  2.6743e+00, -1.5178e-01,
         -1.9827e-09,  2.6905e-01,  2.5623e+00, -2.3496e-02,  9.0550e-02,
          6.1047e+00, -1.5997e-02],
        [ 2.3268e-02,  2.8292e-02, -2.1837e-02,

In [14]:
import torch.nn.functional as F

def k_l_divergence(bow_stats_tst, bow_stats_ref):
    """Params:
    bow_stats_image: Bag-of-words statistics of an image, tensor of shape (S * S, K)
    bow_stats_ref: Bag-of-words statistics of reference images, tensor of shape (N, S * S, K)
    
    Returns: Global feautres distance betweetn the test image and the reference images"""
    kl_sum = []
    for i in range(len(bow_stats_ref)):
        kl_sorted =[]
        for j in range(bow_stats_ref[i].shape[0]):
            block_ref = torch.vsplit(bow_stats_ref[i], bow_stats_ref[i].shape[0])[j]
            block_tst = torch.vsplit(bow_stats_tst, bow_stats_tst.shape[0])[j]
            kl = F.kl_div((F.softmax(block_tst)).log(), F.softmax(block_ref))
            kl_sorted.append(kl)
        kl_sorted = torch.Tensor(kl_sorted)
        kl_sorted = torch.topk(kl_sorted, k=11, dim=0, largest=False, sorted=True, out=None)
        kl_sum.append(torch.sum(kl_sorted.values))

    kl_sum = torch.Tensor(kl_sum)
    #print(kl_sum)
    global_distance = torch.sum(kl_sum) / (bow_stats_tst.shape[0] - 5)
    #print(global_distance)
    return global_distance

In [15]:
distance = k_l_divergence(bow_stats[0], bow_stats)
print(distance)

  kl = F.kl_div((F.softmax(block_tst)).log(), F.softmax(block_ref))


tensor(0.3477)


In [26]:
def process_train_data(train_data,  S):
   
   features = feature_extractor(train_data["image"])["features.denseblock1.denselayer6.conv2"]
   bow_stats = bag_of_words_statistics(features, clusters_centers, S)

   distances = []
   for i in range(len(bow_stats)):
      distance = k_l_divergence(bow_stats[i], bow_stats)
      distances.append(distance)
    
   distances = torch.Tensor(distances)
   distances,idx  = torch.topk(distances, k=int(len(train_data["image"])*0.1),  largest=True, sorted=True, out=None)
   
   idx = idx.tolist()

   most_unique_data = {i: {"image_path": data["image_path"], "label": data["label"]} for i, data in enumerate(train_data) if i in idx}
   return most_unique_data,idx

In [27]:
i, data = next(enumerate(data_module.train_dataloader()))
unique_data,idx = process_train_data(data,  S=4)


  kl = F.kl_div((F.softmax(block_tst)).log(), F.softmax(block_ref))


In [28]:
print(unique_data)
print(idx)

{}
[721, 461, 917, 147, 22, 906, 976, 716, 827, 325, 574, 858, 766, 393, 474, 183, 373, 486, 209, 508, 224, 221, 616, 620, 494, 468, 345, 791, 244, 918, 665, 651, 662, 103, 438, 346, 5, 650, 272, 255, 799, 314, 72, 870, 530, 703, 36, 609, 784, 893, 972, 990, 371, 454, 603, 770, 687, 597, 34, 391, 722, 898, 731, 384, 714, 553, 735, 299, 891, 199, 987, 481, 599, 200, 301, 501, 327, 652, 965, 136, 186, 594, 120, 944, 810, 928, 56, 109, 158, 971, 344, 420, 488, 798, 524, 559, 871, 623, 560, 710]


In [21]:
merged_data = {"image_path":[], "label":[]}
for  i, train_data in enumerate(data_module.train_dataloader()):
    unique_data = process_train_data(train_data, 4)
    
    for key, values in unique_data.items():
        merged_data[key].append(values)

KeyboardInterrupt: 

In [None]:
import csv 
import os
csv_path = "/home/students/tyang/Documents/cpr_trainingdata.csv"


with open(csv_path, mode="w", newline="") as csv_file:
        fieldnames = ["image_path", "label"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        for i in range(len(merged_data["image_path"])):
            rowdict = {"image_path": merged_data["image_path"][i], "label": merged_data["label"][i]}
            writer.writerow(rowdict)