In [1]:
import numpy as np  
from matplotlib import pyplot as plt 
from PIL import Image    
import torch

from anomalib.config import get_configurable_parameters
from anomalib.data import get_datamodule
from anomalib.models import get_model
from anomalib.models.components import feature_extractors
import torchvision
from anomalib.models.components.feature_extractors import TorchFXFeatureExtractor
from torchvision.models.densenet import DenseNet201_Weights
import torch.nn.functional as F
from anomalib.models.components.cluster.kmeans import KMeans
import torchvision.models as models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL = "patchcore"
CONFIG_PATH = f"/home/students/tyang/anomalib/src/anomalib/models/{MODEL}/config_select.yaml"
with open(file=CONFIG_PATH, mode="r",encoding="utf-8") as f:
    print(f.read())
    
config = get_configurable_parameters(config_path=CONFIG_PATH)


dataset:
  name: airogs
  format: airogs
  path: /home/students/tyang/yolov5/runs/detect/exp5/crops
  task: classification # options: [classification, segmentation]
  category: optic disk
  pre_selection: False
  number_of_samples: 17900
  train_batch_size: 1000
  eval_batch_size: 1000
  num_workers: 8
  image_size: 240 # dimensions to which images are resized (mandatory)
  center_crop:  # dimensions to which images are center-cropped after resizing (optional)
  normalization: imagenet # data distribution to which the images will be normalized: [none, imagenet]
  transform_config:
    train: null
    eval: null
  test_split_mode: from_dir # options: [from_dir, synthetic]
  test_split_ratio: 0.5 # fraction of train images held out testing (usage depends on test_split_mode)
  val_split_mode: same_as_test # options: [same_as_test, from_test, synthetic]
  val_split_ratio: 0.5 # fraction of train/test images held out for validation (usage depends on val_split_mode)

  tiling:
    apply: fal

  warn(
  warn(


In [3]:

data_module = get_datamodule(config=config)
data_module.prepare_data() # check if the dataset is avaliable
data_module.setup()



In [4]:
i, train_data = next(enumerate(data_module.train_dataloader()))


In [5]:

feature_extractor = TorchFXFeatureExtractor(
                    backbone="densenet201",
                    return_nodes=["features.denseblock1.denselayer6.conv2"],
                    weights=DenseNet201_Weights.IMAGENET1K_V1,
                )


  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


In [6]:
# feature shape is (batch_size, channel, height, width) 
feature = feature_extractor(train_data["image"])
print(feature["features.denseblock1.denselayer6.conv2"].shape)


torch.Size([1000, 32, 60, 60])


In [7]:
print(train_data["label"].shape)

torch.Size([1000])


In [8]:
# extract features from the whol training dataset

feature_list = []

for  i, train_data in enumerate(data_module.train_dataloader()):
    features = feature_extractor(train_data["image"])["features.denseblock1.denselayer6.conv2"]
    feature_list.append(features)



In [9]:
# convert the list of features to a tensor
global_feature_tensor = torch.vstack(feature_list)   

In [10]:
# global feature shape is (train_data size, channel, height, width)
print(global_feature_tensor.shape)

torch.Size([8651, 32, 60, 60])


In [None]:
from sklearn.cluster import KMeans


def calculate_WSS(points, kmax):
    feature_t = points.permute(1,0,2,3)
    feature_t = feature_t.flatten(start_dim=1)
    feature_t= feature_t.permute(1,0)
    sse = []
    for k in range(1, kmax+1):
        kmeans = KMeans(n_clusters = k).fit(feature_t)
        centroids = kmeans.cluster_centers_
        pred_clusters = kmeans.predict(feature_t)
        curr_sse = 0

        # calculate square of Euclidean distance of each point from its cluster center and add to current WSS
        for i in range(len(points)):
            curr_center = centroids[pred_clusters[i]]
            curr_sse += (points[i, 0] - curr_center[0]) ** 2 + (points[i, 1] - curr_center[1]) ** 2

        sse.append(curr_sse)
    return sse


In [None]:

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

import matplotlib.pyplot as plt
import numpy as np

feature_t = global_feature_tensor.permute(1,0,2,3)
feature_t = feature_t.flatten(start_dim=1)
feature_t= feature_t.permute(1,0)

silhouette_avg = []
for i in range(10,20):
    kmeans_fit = KMeans(n_clusters = i).fit(feature_t)
    silhouette_avg.append(silhouette_score(feature_t, kmeans_fit.labels_))


x_ticks = np.linspace(10,20,11)
plt.xticks(x_ticks)
plt.plot(range(10,20), silhouette_avg)

In [11]:
from anomalib.models.components.cluster.kmeans import KMeans

def get_kmeans_centers(feature_tensor, n_clusters):
    """
    Args:
        feature_t: feature tensor, shape is (batch_size, channel, height, width)
        n_clusters: number of clusters
        
    Returns:
        cluster_center: shape is (n_clusters, channel)
        kmeans: kmeans model   """
    
    feature_t = feature_tensor.permute(1,0,2,3)
    feature_t = feature_t.flatten(start_dim=1)
    feature_t= feature_t.permute(1,0)


    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(feature_t)
    cluster_center = kmeans.cluster_centers_
   
    return cluster_center, kmeans



In [12]:
clusters_centers,kmeans = get_kmeans_centers(global_feature_tensor, n_clusters=12)

print(clusters_centers.shape)
print(len(clusters_centers))

torch.Size([12, 32])
12


In [13]:
def bag_of_words_statistics(Ptst, Cref, S):
    """
    Params:
    Ptst: Feature tensor of a set of images, tensor of shape (N, C, H, W)
    Cref: reference Cluster centers, tensor of shape (K, C)
    S: number of subregions per image dimension, integer

    Returns:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    """
    Ptst = torch.vsplit(Ptst, Ptst.shape[0])
    bow_stats = []
    for Itst in Ptst:
        Itst = Itst.squeeze(0)
        #print(Itst.shape)
        subtensors = torch.chunk(Itst, S, dim=1)
        subtensor = [torch.chunk(st, S, dim=2) for st in subtensors]
        
        
        image_bow_stats = torch.zeros(S * S, len(Cref), dtype=torch.float32)
        for i in range(S):
            for j in range(S):
                st_value = subtensor[i][j]
                st_value = st_value.flatten(start_dim=1)
                st_value = st_value.permute(1,0)
               # print(st_value.shape)
                
                cluster_idx = kmeans.predict(st_value)
                #print(cluster_idx.shape)
                cluster_idx = cluster_idx.float()

                hist = torch.histc(cluster_idx, bins = len(Cref), min = torch.min(cluster_idx), max = torch.max(cluster_idx))
                normalized_hist = hist / torch.sum(hist)
                image_bow_stats[i * S + j] = normalized_hist
        
        bow_stats.append(image_bow_stats)
            
        
    return bow_stats


In [14]:
global_bow_stats = bag_of_words_statistics(global_feature_tensor, clusters_centers, S=4)

In [15]:
print(len(global_bow_stats))
print(global_bow_stats[0].shape)

8651
torch.Size([16, 12])


In [None]:
import torch.nn.functional as F

def kl_distance(i_hist, j_hist):
    """Params:
    i_hist: bow histogram of image i, tensor of shape (1, s * s, K )
    j_hist: bow histogram of image j, tensor of shape (1, s * s, K )

    Returns:
    kl_dist: kl distance between image i and image j, tensor of shape (1)
    """
    i_hist[i_hist == 0] = 1e-10
    j_hist[j_hist == 0] = 1e-10
    kl_divegence = F.kl_div(j_hist.log(), i_hist,reduction="none")
    
    sum = torch.sum(kl_divegence,dim=-1)
    topk,ids = torch.topk(sum, k=11, dim=0, largest=False, sorted=True)
   
    dist = torch.mean(topk)
  
    return dist
    

In [None]:
def kl_select(bow_stats, select_ratio, step_size):
    """
    Params:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    select_ratio: ratio of selected images compare to all images, float
    step_size: batch size, used for big taining dataset, integer

    Returns:
    selected_idx: list of selected image indices with highest kl-divergence in each batch, length number of batches, each element has the size of batch_size * select_ratio
    """
    
    stacked_bow_stats = torch.vstack(bow_stats)
    stacked_bow_stats_reshaped = stacked_bow_stats.view(len(bow_stats),1,bow_stats[0].shape[0],bow_stats[0].shape[1])
    stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)

    stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
    stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10

    selected_idxs =[]
    selected_distances = []
    for i in range(0, stacked_bow_stats_reshaped.shape[0], step_size):
        current_bow = stacked_bow_stats_reshaped[i:i+step_size if i+step_size < stacked_bow_stats_reshaped.shape[0] else stacked_bow_stats_reshaped.shape[0], :, :, :]
        kl_divegence = F.kl_div(current_bow.log(), stacked_bow_stats_transposed,reduction="none")
       # print(kl_divegence.shape)

        sum = torch.sum(kl_divegence,dim=3)
        #print(sum.shape)
        #topk,ids = torch.topk(sum, k=11, dim=2, largest=False, sorted=True)
        dist_matrix = torch.mean(sum,dim=2)
        #print(dist_matrix.shape)
  
        dist_l = torch.sum(dist_matrix,dim=1)
        topk_far_distances,f_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio), dim=0, largest=True, sorted=True)
        #topk_near_distances,n_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio ), dim=0, largest=False, sorted=True)
        #selected_idx = torch.cat((f_idx,n_idx),dim=0)
        selected_idxs.append(f_idx)
        #selected_distance = torch.cat((topk_far_distances,topk_near_distances),dim=0)
        selected_distances.append(topk_far_distances)

    
    return selected_idxs, selected_distances

In [16]:
def kl_select(bow_stats, select_ratio, step_size):
    """
    Params:
    bow_stats: list of normalized Bag-of-words statistics, possibility-like , length N, each element is a tensor of shape (S * S, K)
    select_ratio: ratio of selected images compare to all images, float
    step_size: batch size, used for big taining dataset, integer

    Returns:
    selected_idx: list of selected image indices with highest kl-divergence in each batch, length number of batches, each element has the size of batch_size * select_ratio
    """
    
    stacked_bow_stats = torch.vstack(bow_stats)
    stacked_bow_stats_reshaped = stacked_bow_stats.view(len(bow_stats),1,bow_stats[0].shape[0],bow_stats[0].shape[1])
    stacked_bow_stats_transposed = stacked_bow_stats_reshaped.permute(1,0,2,3)

    stacked_bow_stats_reshaped[stacked_bow_stats_reshaped == 0 ] = 1e-10
    stacked_bow_stats_transposed[stacked_bow_stats_transposed == 0 ] = 1e-10

    selected_idxs =[]
    selected_distances = []
    for i in range(0, stacked_bow_stats_reshaped.shape[0], step_size):
        current_bow = stacked_bow_stats_reshaped[i:i+step_size if i+step_size < stacked_bow_stats_reshaped.shape[0] else stacked_bow_stats_reshaped.shape[0], :, :, :]
        kl_divegence = F.kl_div(current_bow.log(), stacked_bow_stats_transposed,reduction="none")

        sum = torch.sum(kl_divegence,dim=3)
        dist_matrix = torch.mean(sum,dim=2)
  
        dist_l = torch.sum(dist_matrix,dim=1)
        topk_far_distances,f_idx = torch.topk(dist_l, k=int(len(dist_l) * select_ratio), dim=0, largest=True, sorted=True)
        selected_idxs.append(f_idx)
        selected_distances.append(topk_far_distances)

    
    return selected_idxs, selected_distances

In [17]:
idxs,distances = kl_select(global_bow_stats, select_ratio=0.4, step_size=1000)

In [18]:
print(len(idxs))
print(idxs[0].shape)
print(idxs[0].shape[0])
print(idxs[1].shape)
stack_distances = torch.cat(distances)
print(stack_distances.shape)

9
torch.Size([400])
400
torch.Size([400])
torch.Size([3460])


In [19]:
def second_selection(index, distance, select_ratio):
    """
    Params:
    idxs: list of selected image indices in each batch , length number of batches
    distances: list of selected image distances, length number of batches
    select_ratio: ratio of selected images compare to all images, float

    Returns:
    selected_idx: list of selected image indices with biggest distances over all in each batches, length number of batches
    """
    selected_distances = [[] for _ in range(len(distance))]
    distance_t = torch.cat(distance)
    topk, global_ids = torch.topk(distance_t, k=int(len(distance_t) * select_ratio ), dim=0, largest=True, sorted=True)
    selected_idxs = [[] for _ in range(len(index))]
    
    for global_id in global_ids:
        batch_id = global_id // index[0].shape[0]
        local_id = global_id % index[0].shape[0]
        selected_idxs[batch_id].append(index[batch_id][local_id])
        selected_distances[batch_id].append(distance_t[global_id])
        
  
    
    return selected_idxs, selected_distances

In [25]:
second_indices,second_dists = second_selection(idxs, distances, select_ratio=0.3)

In [26]:
print(len(second_indices))
print(len(second_dists))
print(second_indices[0])
print(len(second_indices[0]))
print(len(second_indices[1]))

9
9
[tensor(525), tensor(431), tensor(324), tensor(917), tensor(585), tensor(617), tensor(707), tensor(442), tensor(726), tensor(981), tensor(195), tensor(764), tensor(283), tensor(674), tensor(721), tensor(98), tensor(843), tensor(519), tensor(777), tensor(81), tensor(719), tensor(631), tensor(876), tensor(921), tensor(372), tensor(474), tensor(942), tensor(844), tensor(373), tensor(498), tensor(382), tensor(24), tensor(711), tensor(790), tensor(325), tensor(922), tensor(364), tensor(793), tensor(936), tensor(262), tensor(666), tensor(929), tensor(447), tensor(539), tensor(124), tensor(808), tensor(311), tensor(445), tensor(586), tensor(691), tensor(957), tensor(57), tensor(121), tensor(385), tensor(188), tensor(307), tensor(526), tensor(393), tensor(10), tensor(359), tensor(362), tensor(427), tensor(606), tensor(342), tensor(181), tensor(99), tensor(582), tensor(759), tensor(867), tensor(593), tensor(713), tensor(613), tensor(608), tensor(224), tensor(959), tensor(496), tensor(230), 

In [27]:
merged_datas = {"image_path": [], "label": []}
for  i, train_data in enumerate(data_module.train_dataloader()):
    selected_data = ( {"image_path": train_data["image_path"][second_indice], "label": train_data["label"][second_indice]} for second_indice in second_indices[i] )
    
    
    for data in selected_data:
        for key, values in data.items():
            merged_datas[key].append(values)
    

print(merged_datas["image_path"])

['/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001168.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN000946.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN000704.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN002012.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001281.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001352.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001550.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN000967.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001594.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN002133.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN000427.jpg', '/home/students/tyang/yolov5/runs/detect/exp5/crops/optic disk/TRAIN001678.jpg', '/home/students/tyang/yolov

In [28]:
print(len(merged_datas["image_path"]))

1038


In [29]:
import csv 

csv_path = "/home/students/tyang/Documents/no_robust_1038od.csv"


with open(csv_path, mode="w", newline="") as csv_file:
        fieldnames = ["image_path", "label"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        for i in range(len(merged_datas["image_path"])):
            rowdict = {"image_path": merged_datas["image_path"][i], "label": merged_datas["label"][i]}
            writer.writerow(rowdict)