In [1]:
import numpy as np  
from matplotlib import pyplot as plt 
from PIL import Image    
import torch

from anomalib.config import get_configurable_parameters
from anomalib.data import get_datamodule
from anomalib.models import get_model
from anomalib.models.components import feature_extractors
import torchvision
from anomalib.models.components.feature_extractors import TorchFXFeatureExtractor
from torchvision.models.densenet import DenseNet201_Weights
import torch.nn.functional as F
from anomalib.models.components.cluster.kmeans import KMeans
import torchvision.models as models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL = "patchcore"
CONFIG_PATH = f"/home/students/tyang/anomalib/src/anomalib/models/{MODEL}/config.yaml"
with open(file=CONFIG_PATH, mode="r",encoding="utf-8") as f:
    print(f.read())
    
config = get_configurable_parameters(config_path=CONFIG_PATH)


dataset:
  name: airogs
  format: airogs
  path: /home/students/tyang/airogs 
  task: classification # options: [classification, segmentation]
  category: 0
  number_of_samples: 17999
  pre_selection: False
  train_batch_size: 1000
  eval_batch_size: 32
  num_workers: 8
  image_size: 256 # dimensions to which images are resized (mandatory)
  center_crop: 224 # dimensions to which images are center-cropped after resizing (optional)
  normalization: imagenet # data distribution to which the images will be normalized: [none, imagenet]
  transform_config:
    train: null
    eval: null
  test_split_mode: from_dir # options: [from_dir, synthetic]
  test_split_ratio: 0.1 # fraction of train images held out testing (usage depends on test_split_mode)
  val_split_mode: same_as_test # options: [same_as_test, from_test, synthetic]
  val_split_ratio: 0.1 # fraction of train/test images held out for validation (usage depends on val_split_mode)
  tiling:
    apply: false
    tile_size: null
    stri

  warn(
  warn(


In [3]:

data_module = get_datamodule(config=config)
data_module.prepare_data() # check if the dataset is avaliable
data_module.setup()

i, train_data = next(enumerate(data_module.train_dataloader()))


In [4]:

feature_extractor = TorchFXFeatureExtractor(
                    backbone="densenet201",
                    return_nodes=["features.denseblock1.denselayer6.conv2"],
                    weights=DenseNet201_Weights.IMAGENET1K_V1,
                )


  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


In [5]:
# feature shape is (batch_size, channel, height, width) 
feature = feature_extractor(train_data["image"])
print(feature["features.denseblock1.denselayer6.conv2"].shape)


torch.Size([1000, 32, 56, 56])


In [60]:
print(train_data["label"].shape)

torch.Size([658])


In [6]:
# extract features from the whol training dataset

feature_list = []

for  i, train_data in enumerate(data_module.train_dataloader()):
    features = feature_extractor(train_data["image"])["features.denseblock1.denselayer6.conv2"]
    feature_list.append(features)



In [7]:
# convert the list of features to a tensor
global_feature_tensor = torch.vstack(feature_list)   

In [8]:
# global feature shape is (train_data size, channel, height, width)
print(global_feature_tensor.shape)

torch.Size([15658, 32, 56, 56])


In [10]:
from sklearn.cluster import KMeans


def calculate_WSS(points, kmax):
    feature_t = points.permute(1,0,2,3)
    feature_t = feature_t.flatten(start_dim=1)
    feature_t= feature_t.permute(1,0)
    sse = []
    for k in range(1, kmax+1):
        kmeans = KMeans(n_clusters = k).fit(feature_t)
        centroids = kmeans.cluster_centers_
        pred_clusters = kmeans.predict(feature_t)
        curr_sse = 0

        # calculate square of Euclidean distance of each point from its cluster center and add to current WSS
        for i in range(len(points)):
            curr_center = centroids[pred_clusters[i]]
            curr_sse += (points[i, 0] - curr_center[0]) ** 2 + (points[i, 1] - curr_center[1]) ** 2

        sse.append(curr_sse)
    return sse


In [11]:
calculate_WSS(global_feature_tensor, 30)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

[tensor([[104.5042, 105.8957,  25.3389,  ...,  38.5916,  10.6874,  47.0947],
         [138.2239, 395.0480, 290.1508,  ..., 375.0502, 377.7718,  20.4888],
         [ 42.3854,  32.9052,  14.5907,  ...,   6.9127,  38.5064,  85.6371],
         ...,
         [ 15.5357,  34.0918,  12.2421,  ...,  20.8760,  23.0416,  27.8948],
         [ 64.7280,  90.0123,  48.8952,  ...,  24.1064,  46.9748, 313.3716],
         [  9.8934,  20.8227,  26.5474,  ...,  10.0774,  83.5677,  47.3176]]),
 tensor([[107.2809, 112.0834,  29.9378,  ...,  43.0899,  16.1677,  52.6081],
         [140.5007, 398.3813, 292.0989,  ..., 375.6502, 383.5330,  26.2437],
         [ 49.5899,  38.0357,  21.5539,  ...,  13.3059,  42.7677,  91.0283],
         ...,
         [ 21.3181,  40.3930,  18.1389,  ...,  25.8063,  28.5770,  33.7376],
         [ 69.0450,  94.2504,  54.4442,  ...,  29.1268,  50.8470, 320.8605],
         [ 16.7719,  27.9320,  32.8961,  ...,  17.1227,  89.5425,  55.0643]]),
 tensor([[108.8602, 115.0274,  34.5736,  ...

In [14]:

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

import matplotlib.pyplot as plt
import numpy as np

feature_t = global_feature_tensor.permute(1,0,2,3)
feature_t = feature_t.flatten(start_dim=1)
feature_t= feature_t.permute(1,0)

silhouette_avg = []
for i in range(10,20):
    kmeans_fit = KMeans(n_clusters = i).fit(feature_t)
    silhouette_avg.append(silhouette_score(feature_t, kmeans_fit.labels_))


x_ticks = np.linspace(10,20,11)
plt.xticks(x_ticks)
plt.plot(range(10,20), silhouette_avg)

  super()._check_params_vs_input(X, default_n_init=10)


KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "/work/scratch/tyang/miniconda3/env/anomalib_env/lib/python3.10/site-packages/numpy/core/multiarray.py", line 346, in where
    @array_function_from_c_func_and_dispatcher(_multiarray_umath.where)
KeyboardInterrupt: 


KeyboardInterrupt: 

In [10]:
from anomalib.models.components.cluster.kmeans import KMeans

def get_kmeans_centers(feature_tensor, n_clusters):
    """
    Args:
        feature_t: feature tensor, shape is (batch_size, channel, height, width)
        n_clusters: number of clusters
        
    Returns:
        cluster_center: shape is (n_clusters, channel)
        kmeans: kmeans model   """
    
    feature_t = feature_tensor.permute(1,0,2,3)
    feature_t = feature_t.flatten(start_dim=1)
    feature_t= feature_t.permute(1,0)


    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(feature_t)
    cluster_center = kmeans.cluster_centers_
   
    return cluster_center, kmeans



In [38]:
clusters_centers,kmeans = get_kmeans_centers(global_feature_tensor, n_clusters=12)

print(clusters_centers.shape)
print(len(clusters_centers))

torch.Size([10, 32])
10


In [40]:
def bag_of_words_statistics(Ptst, Cref, S):
    """
    Params:
    Ptst: Feature tensor of a set of images, tensor of shape (N, C, H, W)
    Cref: reference Cluster centers, tensor of shape (K, C)
    S: number of subregions per image dimension, integer

    Returns:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    """
    Ptst = torch.vsplit(Ptst, Ptst.shape[0])
    bow_stats = []
    for Itst in Ptst:
        Itst = Itst.squeeze(0)
        #print(Itst.shape)
        subtensors = torch.chunk(Itst, S, dim=1)
        subtensor = [torch.chunk(st, S, dim=2) for st in subtensors]
        
        
        image_bow_stats = torch.zeros(S * S, len(Cref), dtype=torch.float32)
        for i in range(S):
            for j in range(S):
                st_value = subtensor[i][j]
                st_value = st_value.flatten(start_dim=1)
                st_value = st_value.permute(1,0)
               # print(st_value.shape)
                
                cluster_idx = kmeans.predict(st_value)
                #print(cluster_idx.shape)
                cluster_idx = cluster_idx.float()

                hist = torch.histc(cluster_idx, bins = len(Cref), min = torch.min(cluster_idx), max = torch.max(cluster_idx))
                normalized_hist = hist / torch.sum(hist)
                image_bow_stats[i * S + j] = normalized_hist
        
        bow_stats.append(image_bow_stats)
            
        
    return bow_stats


In [41]:
global_bow_stats = bag_of_words_statistics(global_feature_tensor, clusters_centers, S=4)

In [42]:
print(len(global_bow_stats))
print(global_bow_stats[0].shape)

15658
torch.Size([16, 10])


In [14]:
import torch.nn.functional as F

def kl_distance(i_hist, j_hist):
    """Params:
    i_hist: bow histogram of image i, tensor of shape (1, s * s, K )
    j_hist: bow histogram of image j, tensor of shape (1, s * s, K )

    Returns:
    kl_dist: kl distance between image i and image j, tensor of shape (1)
    """
    i_hist[i_hist == 0] = 1e-10
    j_hist[j_hist == 0] = 1e-10
    kl_divegence = F.kl_div(j_hist.log(), i_hist,reduction="none")
    
    sum = torch.sum(kl_divegence,dim=-1)
    topk,ids = torch.topk(sum, k=11, dim=0, largest=False, sorted=True)
   
    dist = torch.mean(topk)
  
    return dist
    

In [43]:
def kl_select(bow_stats, select_ratio, step_size):
    """
    Params:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    select_ratio: ratio of selected images compare to all images, float
    step_size: batch size, used for big taining dataset, integer

    Returns:
    selected_idx: list of selected image indices with highest kl-divergence in each batch, length number of batches, each element has the size of batch_size * select_ratio
    """
    
    stacked_bow_stats = torch.vstack(bow_stats)
    stacked_bow_stats_reshaped = stacked_bow_stats.view(len(bow_stats),1,bow_stats[0].shape[0],bow_stats[0].shape[1])
    stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)

    stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
    stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10

    selected_idxs =[]
    selected_distances = []
    for i in range(0, stacked_bow_stats_reshaped.shape[0], step_size):
        current_bow = stacked_bow_stats_reshaped[i:i+step_size if i+step_size < stacked_bow_stats_reshaped.shape[0] else stacked_bow_stats_reshaped.shape[0], :, :, :]
        kl_divegence = F.kl_div(current_bow.log(), stacked_bow_stats_transposed,reduction="none")
       # print(kl_divegence.shape)

        sum = torch.sum(kl_divegence,dim=3)
        #print(sum.shape)
        #topk,ids = torch.topk(sum, k=11, dim=2, largest=False, sorted=True)
        dist_matrix = torch.mean(sum,dim=2)
        #print(dist_matrix.shape)
  
        dist_l = torch.sum(dist_matrix,dim=1)
        topk_far_distances,f_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio), dim=0, largest=True, sorted=True)
        #topk_near_distances,n_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio ), dim=0, largest=False, sorted=True)
        #selected_idx = torch.cat((f_idx,n_idx),dim=0)
        selected_idxs.append(f_idx)
        #selected_distance = torch.cat((topk_far_distances,topk_near_distances),dim=0)
        selected_distances.append(topk_far_distances)

    
    return selected_idxs, selected_distances

In [15]:
def kl_select(bow_stats, select_ratio, step_size):
    """
    Params:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    select_ratio: ratio of selected images compare to all images, float
    step_size: batch size, used for big taining dataset, integer

    Returns:
    selected_idx: list of selected image indices with highest kl-divergence in each batch, length number of batches, each element has the size of batch_size * select_ratio
    """
    
    stacked_bow_stats = torch.vstack(bow_stats)
    stacked_bow_stats_reshaped = stacked_bow_stats.view(len(bow_stats),1,bow_stats[0].shape[0],bow_stats[0].shape[1])
    stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)

    stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
    stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10

    selected_idxs =[]
    selected_distances = []
    for i in range(0, stacked_bow_stats_reshaped.shape[0], step_size):
        current_bow = stacked_bow_stats_reshaped[i:i+step_size if i+step_size < stacked_bow_stats_reshaped.shape[0] else stacked_bow_stats_reshaped.shape[0], :, :, :]
        kl_divegence = F.kl_div(current_bow.log(), stacked_bow_stats_transposed,reduction="none")

        sum = torch.sum(kl_divegence,dim=3)
        dist_matrix = torch.mean(topk,dim=2)
  
        dist_l = torch.sum(dist_matrix,dim=1)
        topk_far_distances,f_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio), dim=0, largest=True, sorted=True)
        selected_idxs.append(f_idx)
        selected_distances.append(topk_far_distances)

    
    return selected_idxs, selected_distances

In [50]:
idxs,distances = kl_select(global_bow_stats, select_ratio=0.4, step_size=1000)

In [51]:
print(len(idxs))
print(idxs[0].shape)
print(idxs[0].shape[0])
print(idxs[1].shape)
stack_distances = torch.cat(distances)
print(stack_distances.shape)

16
torch.Size([400])
400
torch.Size([400])
torch.Size([6263])


In [52]:
def second_selection(index, distance, select_ratio):
    """
    Params:
    idxs: list of selected image indices in each batch , length number of batches
    distances: list of selected image distances, length number of batches
    select_ratio: ratio of selected images compare to all images, float

    Returns:
    selected_idx: list of selected image indices with biggest distances over all in each batches, length number of batches
    """
    selected_distances = [[] for _ in range(len(distance))]
    distance_t = torch.cat(distance)
    topk, global_ids = torch.topk(distance_t, k=int(len(distance_t) * select_ratio ), dim=0, largest=True, sorted=True)
    selected_idxs = [[] for _ in range(len(index))]
    
    for global_id in global_ids:
        batch_id = global_id // index[0].shape[0]
        local_id = global_id % index[0].shape[0]
        selected_idxs[batch_id].append(index[batch_id][local_id])
        selected_distances[batch_id].append(distance_t[global_id])
        
  
    
    return selected_idxs, selected_distances

In [53]:
second_indices,second_dists = second_selection(idxs, distances, select_ratio=0.2)

In [54]:
print(len(second_indices))
print(len(second_dists))
print(second_indices[0])
print(len(second_indices[0]))
print(len(second_indices[1]))

16
16
[tensor(505), tensor(936), tensor(819), tensor(715), tensor(839), tensor(712), tensor(733), tensor(986), tensor(136), tensor(624), tensor(878), tensor(80), tensor(487), tensor(291), tensor(846), tensor(272), tensor(309), tensor(12), tensor(176), tensor(756), tensor(124), tensor(729), tensor(941), tensor(193), tensor(304), tensor(778), tensor(40), tensor(874), tensor(370), tensor(158), tensor(510), tensor(897), tensor(8), tensor(716), tensor(608), tensor(896), tensor(442), tensor(613), tensor(353), tensor(630), tensor(875), tensor(786), tensor(713), tensor(820), tensor(680), tensor(28), tensor(20), tensor(314), tensor(384), tensor(726), tensor(59), tensor(35), tensor(720), tensor(542), tensor(905), tensor(787), tensor(949), tensor(827), tensor(421), tensor(371), tensor(580), tensor(70), tensor(760), tensor(646), tensor(261), tensor(336)]
66
79


In [55]:
merged_datas = {"image_path": [], "label": []}
for  i, train_data in enumerate(data_module.train_dataloader()):
    selected_data = ( {"image_path": train_data["image_path"][second_indice], "label": train_data["label"][second_indice]} for second_indice in second_indices[i] )
    
    
    for data in selected_data:
        for key, values in data.items():
            merged_datas[key].append(values)
    

print(merged_datas["image_path"])

['/home/students/tyang/airogs/0/TRAIN000580.jpg', '/home/students/tyang/airogs/0/TRAIN001072.jpg', '/home/students/tyang/airogs/0/TRAIN000946.jpg', '/home/students/tyang/airogs/0/TRAIN000822.jpg', '/home/students/tyang/airogs/0/TRAIN000967.jpg', '/home/students/tyang/airogs/0/TRAIN000819.jpg', '/home/students/tyang/airogs/0/TRAIN000845.jpg', '/home/students/tyang/airogs/0/TRAIN001135.jpg', '/home/students/tyang/airogs/0/TRAIN000158.jpg', '/home/students/tyang/airogs/0/TRAIN000718.jpg', '/home/students/tyang/airogs/0/TRAIN001011.jpg', '/home/students/tyang/airogs/0/TRAIN000093.jpg', '/home/students/tyang/airogs/0/TRAIN000562.jpg', '/home/students/tyang/airogs/0/TRAIN000335.jpg', '/home/students/tyang/airogs/0/TRAIN000975.jpg', '/home/students/tyang/airogs/0/TRAIN000313.jpg', '/home/students/tyang/airogs/0/TRAIN000356.jpg', '/home/students/tyang/airogs/0/TRAIN000015.jpg', '/home/students/tyang/airogs/0/TRAIN000202.jpg', '/home/students/tyang/airogs/0/TRAIN000870.jpg', '/home/students/tya

In [56]:
print(len(merged_datas["image_path"]))

1252


In [57]:
import csv 

csv_path = "/home/students/tyang/Documents/no_robust_mean10_1252trainingdata.csv"


with open(csv_path, mode="w", newline="") as csv_file:
        fieldnames = ["image_path", "label"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        for i in range(len(merged_datas["image_path"])):
            rowdict = {"image_path": merged_datas["image_path"][i], "label": merged_datas["label"][i]}
            writer.writerow(rowdict)