In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
_colab_dir = "/content/drive/MyDrive/mvtec_ad_package"
os.chdir(_colab_dir)
os.getcwd()

'/content/drive/MyDrive/mvtec_ad_package'

In [3]:
import os
import numpy as np
import torch

!python --version

Python 3.8.10


In [4]:
!pip3 install torch torchvision
!pip3 install faiss_gpu
!pip3 install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting faiss_gpu
  Downloading faiss_gpu-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss_gpu
Successfully installed faiss_gpu-1.7.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m26.8 MB

In [5]:
from src import *
from src import _BACKBONES, _CLASSNAMES

In [6]:
# data set path
data_path = "/content/drive/MyDrive/Data/ImageData/mvtec_ad"
name = 'mvtec'

In [7]:
# ----------
# 'cuda' or 'cpu'
device = set_torch_device([0])
# device = 'cuda' if torch.cuda.is_available() else 'cpu'

faiss_on_gpu = True
faiss_num_workers = 12
num_workers = 12

In [8]:
# ----------
# result is saved here
results_path = '/content/drive/MyDrive/mvtec_ad_package/results'
log_project = 'MVTecAD_Results'

In [9]:
# ----------
# backbones
print(_BACKBONES.keys())

# set backbones
# set layers manually

# WideResNet50
# backbone_names = ['wideresnet50']
# layers_to_extract_from = ['layer2', 'layer3']

# MobileNetV2_100
backbone_names = ['mobilenetv2_100']
layers_to_extract_from = ['blocks.2', 'blocks.3']

dict_keys(['alexnet', 'bninception', 'resnet50', 'resnet101', 'resnext101', 'resnet200', 'resnest50', 'resnetv2_50_bit', 'resnetv2_50_21k', 'resnetv2_101_bit', 'resnetv2_101_21k', 'resnetv2_152_bit', 'resnetv2_152_21k', 'resnetv2_152_384', 'resnetv2_101', 'vgg11', 'vgg19', 'vgg19_bn', 'wideresnet50', 'wideresnet101', 'mnasnet_100', 'mnasnet_a1', 'mnasnet_b1', 'densenet121', 'densenet201', 'inception_v4', 'vit_small', 'vit_base', 'vit_large', 'vit_r50', 'vit_deit_base', 'vit_deit_distilled', 'vit_swin_base', 'vit_swin_large', 'efficientnet_b7', 'efficientnet_b5', 'efficientnet_b3', 'efficientnet_b1', 'efficientnetv2_m', 'efficientnetv2_l', 'efficientnet_b3a', 'mobilenetv2_100'])


In [10]:
# ----------
# resize and crop size
resize = 256
cropsize = 224

# embedding dimension
pretrain_embed_dimension = 1024
target_embed_dimension = 1024

# coreset subsampling
percentage = 0.1

# number of nearest neighbours
anomaly_scorer_num_nn = 5

# patchsize
patchsize = 3

In [11]:
# ----------
# log group and create path
# log_group = f'IM{str(cropsize)}_WR50_L2-3_P01_D{str(pretrain_embed_dimension)}-{target_embed_dimension}_PS-{str(patchsize)}_AN-1_S0'
log_group = f'IM{str(cropsize)}_MBNV2100_B2-3_P01_D{str(pretrain_embed_dimension)}-{target_embed_dimension}_PS-{str(patchsize)}'

run_save_path = create_storage_folder(
    results_path, log_project, log_group, mode="iterate"
)

print(run_save_path)

/content/drive/MyDrive/mvtec_ad_package/results/MVTecAD_Results/IM224_MBNV2100_B2-3_P01_D1024-1024_PS-3_0


In [12]:
# ----------------------------------------------------------------------------------------------------------------------
# base setting-2:  select category (for MVTec)
# ----------------------------------------------------------------------------------------------------------------------

idx = 9
mvtec_classname = _CLASSNAMES[idx]

print(f'classname: {mvtec_classname}')

dataset_name = f'{name}_{mvtec_classname}'

classname: screw


In [14]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# data loaders
# ----------------------------------------------------------------------------------------------------------------------

batch_size = 32
train_val_split = 1.0
seed = 0

train_dataset = MVTecDataset(
    data_path,
    classname=mvtec_classname,
    resize=resize,
    train_val_split=train_val_split,
    imagesize=cropsize,
    split=DatasetSplit.TRAIN,
    seed=seed,
    augment=True,
)

test_dataset = MVTecDataset(
    data_path,
    classname=mvtec_classname,
    resize=resize,
    imagesize=cropsize,
    split=DatasetSplit.TEST,
    seed=seed,
)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)


train_dataloader.name = name


# ----------
torch.cuda.empty_cache()

imagesize = train_dataloader.dataset.imagesize

print(f'image size: {imagesize}')

image size: (3, 224, 224)


In [15]:
if len(backbone_names) > 1:
    layers_to_extract_from_coll = [[] for _ in range(len(backbone_names))]
    for layer in layers_to_extract_from:
        idx = int(layer.split(".")[0])
        layer = ".".join(layer.split(".")[1:])
        layers_to_extract_from_coll[idx].append(layer)
else:
    layers_to_extract_from_coll = [layers_to_extract_from]

print(f'layers: {layers_to_extract_from_coll}')


layers: [['blocks.2', 'blocks.3']]


In [16]:
# ----------------------------------------------------------------------------------------------------------------------
# set sampler:  ApproximateGreedyCoresetSampler
# ----------------------------------------------------------------------------------------------------------------------

# sampler = IdentitySampler()


# sampler = GreedyCoresetSampler(
#     percentage=percentage,
#     device=device,
#     dimension_to_project_features_to=128
# )


# this is required
sampler = ApproximateGreedyCoresetSampler(
    percentage=percentage,
    device=device,
    number_of_starting_points=10,
    dimension_to_project_features_to=128
)

In [17]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# patchcore instance, loader
# ----------------------------------------------------------------------------------------------------------------------

loaded_patchcores = []

for backbone_name, layers_to_extract_from in zip(
    backbone_names, layers_to_extract_from_coll
):
    backbone_seed = None
    if ".seed-" in backbone_name:
        backbone_name, backbone_seed = backbone_name.split(".seed-")[0], int(
            backbone_name.split("-")[-1]
        )
    backbone = load(backbone_name)
    backbone.name, backbone.seed = backbone_name, backbone_seed

    nn_method = FaissNN(faiss_on_gpu, faiss_num_workers)

    patchcore_instance = PatchCore(device)
    patchcore_instance.load(
        backbone=backbone,
        layers_to_extract_from=layers_to_extract_from,
        device=device,
        input_shape=imagesize,
        pretrain_embed_dimension=pretrain_embed_dimension,
        target_embed_dimension=target_embed_dimension,
        patchsize=patchsize,
        featuresampler=sampler,
        anomaly_scorer_num_nn=anomaly_scorer_num_nn,
        nn_method=nn_method,
    )
    loaded_patchcores.append(patchcore_instance)
    

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_100_ra-b33bc2c4.pth" to /root/.cache/torch/hub/checkpoints/mobilenetv2_100_ra-b33bc2c4.pth


In [None]:
# ----------
print(len(loaded_patchcores))
print(loaded_patchcores[0])

In [19]:

########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# Training
# ----------------------------------------------------------------------------------------------------------------------

for i, PC in enumerate(loaded_patchcores):
    torch.cuda.empty_cache()
    print("Training models ({}/{})".format(i + 1, len(loaded_patchcores)))
    if PC.backbone.seed is not None:
        fix_seeds(PC.backbone.seed, device)
    torch.cuda.empty_cache()
    PC.fit(train_dataloader)
  

Training models (1/1)



Computing support features...:   0%|          | 0/10 [00:00<?, ?it/s][A
Computing support features...:  10%|█         | 1/10 [00:18<02:50, 18.90s/it][A
Computing support features...:  20%|██        | 2/10 [00:19<01:05,  8.24s/it][A
Computing support features...:  30%|███       | 3/10 [00:20<00:33,  4.77s/it][A
Computing support features...:  40%|████      | 4/10 [00:21<00:19,  3.18s/it][A
Computing support features...:  50%|█████     | 5/10 [00:21<00:11,  2.32s/it][A
Computing support features...:  60%|██████    | 6/10 [00:22<00:07,  1.85s/it][A
Computing support features...:  70%|███████   | 7/10 [00:23<00:04,  1.52s/it][A
Computing support features...:  80%|████████  | 8/10 [00:24<00:02,  1.35s/it][A
Computing support features...:  90%|█████████ | 9/10 [00:25<00:01,  1.18s/it][A
Computing support features...: 100%|██████████| 10/10 [00:26<00:00,  1.07s/it][A
Subsampling...: 100%|██████████| 25088/25088 [00:47<00:00, 528.05it/s]


In [20]:
# ----------------------------------------------------------------------------------------------------------------------
# Embedding test data
# ----------------------------------------------------------------------------------------------------------------------

torch.cuda.empty_cache()

aggregator = {"scores": [], "segmentations": []}

for i, PC in enumerate(loaded_patchcores):
    torch.cuda.empty_cache()
    print("Embedding test data with models ({}/{})".format(i + 1, len(loaded_patchcores)))
    scores, segmentations, labels_gt, masks_gt = PC.predict(test_dataloader)
    aggregator["scores"].append(scores)
    aggregator["segmentations"].append(segmentations)


# ----------
# length is equal to number of images for test
print(len(aggregator['scores'][0]))
print(len(aggregator['segmentations'][0]))


# ----------
# 1st image in train data
idx_img = 0
print(aggregator['scores'][0][idx_img])
print(aggregator['segmentations'][0][idx_img].shape)  # (cropsize, cropsize)
print(aggregator['segmentations'][0][idx_img])


Embedding test data with models (1/1)


                                                           

160
160
18228.297
(224, 224)
[[ 5870.67    5886.0713  5917.024  ... 10350.856  10382.912  10399.035 ]
 [ 5869.366   5885.335   5917.411  ... 10347.728  10378.965  10394.677 ]
 [ 5867.2197  5884.3027  5918.587  ... 10341.645  10371.269  10386.168 ]
 ...
 [ 6295.427   6315.8784  6356.268  ...  5710.446   5562.359   5487.712 ]
 [ 6228.9507  6250.857   6294.1187 ...  5738.443   5586.8037  5510.3525]
 [ 6195.5356  6218.1772  6262.8906 ...  5752.615   5599.185   5521.824 ]]




In [21]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# score - 1:  anomaly scores  (normalize by min-max scaling)
# ----------------------------------------------------------------------------------------------------------------------

# scores
scores = np.array(aggregator["scores"])

min_scores = scores.min(axis=-1).reshape(-1, 1)
max_scores = scores.max(axis=-1).reshape(-1, 1)
scores = (scores - min_scores) / (max_scores - min_scores)
scores = np.mean(scores, axis=0)

print(f'min score: {min_scores}    max score:  {max_scores}')
print(len(scores))

# normalized anomaly score for 1st image
print(scores[idx_img])


min score: [[17260.562]]    max score:  [[53751.22]]
160
0.02652006


In [22]:
# ----------------------------------------------------------------------------------------------------------------------
# get label  (note that this is ground truth not prediction)
#  - False:  good
#  - True:  defective
# ----------------------------------------------------------------------------------------------------------------------

idx_img = 100
tmp = test_dataloader.dataset.data_to_iterate
print(tmp[idx_img])


# x[0]: category
# x[1]:  'good', 'defective' (for example)
# x[2]:  image path
# x[3]:  ground truth mask image path
anomaly_labels = [
    x[1] != "good" for x in test_dataloader.dataset.data_to_iterate
]

print(anomaly_labels)


['screw', 'scratch_neck', '/content/drive/MyDrive/Data/ImageData/mvtec_ad/screw/test/scratch_neck/011.png', '/content/drive/MyDrive/Data/ImageData/mvtec_ad/screw/ground_truth/scratch_neck/011_mask.png']
[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True,

In [23]:
# ----------------------------------------------------------------------------------------------------------------------
# score - 2:  segmentation  (normalize by min-max scaling)
# ----------------------------------------------------------------------------------------------------------------------

segmentations = np.array(aggregator["segmentations"])

min_scores = (
    segmentations.reshape(len(segmentations), -1)
    .min(axis=-1)
    .reshape(-1, 1, 1, 1)
)

max_scores = (
    segmentations.reshape(len(segmentations), -1)
    .max(axis=-1)
    .reshape(-1, 1, 1, 1)
)

print(f'min score: {min_scores}    max score:  {max_scores}')


# ----------
segmentations = (segmentations - min_scores) / (max_scores - min_scores)
# (1, # of test images, cropsize, cropsize)
print(segmentations.shape)


# remove axis=0
segmentations = np.mean(segmentations, axis=0)
# now the dimension is (# of test images, cropsize, cropsize)
print(segmentations.shape)


min score: [[[[2653.461]]]]    max score:  [[[[47514.04]]]]
(1, 160, 224, 224)
(160, 224, 224)


In [24]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# plot and save
#  - input test image, mask (ground truth), segmentation by each image
# ----------------------------------------------------------------------------------------------------------------------

# x[2]:  image path
image_paths = [
    x[2] for x in test_dataloader.dataset.data_to_iterate
]

# x[3]:  ground truth mask image path
mask_paths = [
    x[3] for x in test_dataloader.dataset.data_to_iterate
]

image_save_path = os.path.join(
    run_save_path, "segmentation_images", dataset_name
)

os.makedirs(image_save_path, exist_ok=True)

def image_transform(image):
     # reshape to apply each value to each channel
     in_std = np.array([0.229, 0.224, 0.225]).reshape(-1, 1, 1)
     in_mean = np.array([0.485, 0.456, 0.406]).reshape(-1, 1, 1)
     image = test_dataloader.dataset.transform_img(image)
     return np.clip((image.numpy() * in_std + in_mean) * 255, 0, 255).astype(np.uint8)


def mask_transform(mask):
     return test_dataloader.dataset.transform_mask(mask).numpy()


plot_segmentation_images(
     image_save_path,
     image_paths,
     segmentations,
     scores,
     mask_paths,
     image_transform=image_transform,
     mask_transform=mask_transform,
     save_depth=4
)



In [25]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# compute evaluation metrics
# ----------------------------------------------------------------------------------------------------------------------

auroc = compute_imagewise_retrieval_metrics(scores, anomaly_labels)["auroc"]


# ----------
# Compute PRO score & PW Auroc for all images
pixel_scores = compute_pixelwise_retrieval_metrics(segmentations, masks_gt)
full_pixel_auroc = pixel_scores["auroc"]


# ----------
# Compute PRO score & PW Auroc only images with anomalies
sel_idxs = []

for i in range(len(masks_gt)):
    if np.sum(masks_gt[i]) > 0:
        sel_idxs.append(i)

pixel_scores = compute_pixelwise_retrieval_metrics(
    [segmentations[i] for i in sel_idxs],
    [masks_gt[i] for i in sel_idxs],
)

anomaly_pixel_auroc = pixel_scores["auroc"]


In [26]:
# ----------------------------------------------------------------------------------------------------------------------
# arrange metrics and save
#  - note that this part should be run for all category, but here only 1 category
# ----------------------------------------------------------------------------------------------------------------------

result_collect = []

result_collect.append(
    {
        "dataset_name": dataset_name,
        "instance_auroc": auroc,
        "full_pixel_auroc": full_pixel_auroc,
        "anomaly_pixel_auroc": anomaly_pixel_auroc,
    }
)

for key, item in result_collect[-1].items():
    if key != "dataset_name":
        print("{0}: {1:3.3f}".format(key, item))

print(result_collect)


# ---------
# Store all results and mean scores to a csv-file

result_metric_names = list(result_collect[-1].keys())[1:]

result_dataset_names = [results["dataset_name"] for results in result_collect]

result_scores = [list(results.values())[1:] for results in result_collect]

compute_and_store_final_results(
    run_save_path,
    result_scores,
    column_names=result_metric_names,
    row_names=result_dataset_names,
)


instance_auroc: 0.953
full_pixel_auroc: 0.995
anomaly_pixel_auroc: 0.994
[{'dataset_name': 'mvtec_screw', 'instance_auroc': 0.9534740725558516, 'full_pixel_auroc': 0.9949314588309308, 'anomaly_pixel_auroc': 0.9935867047774528}]


{'mean_instance_auroc': 0.9534740725558516,
 'mean_full_pixel_auroc': 0.9949314588309308,
 'mean_anomaly_pixel_auroc': 0.9935867047774528}

In [27]:
########################################################################################################################
# ----------------------------------------------------------------------------------------------------------------------
# Store PatchCore model for later re-use
# ----------------------------------------------------------------------------------------------------------------------

save_path = os.path.join(
    run_save_path, "models", dataset_name
)

os.makedirs(save_path, exist_ok=True)

print(save_path)


for i, PC in enumerate(loaded_patchcores):
    prepend = (
        "Ensemble-{}-{}_".format(i + 1, len(loaded_patchcores))
        if len(loaded_patchcores) > 1
        else ""
    )
    PC.save_to_path(save_path, prepend)


/content/drive/MyDrive/mvtec_ad_package/results/MVTecAD_Results/IM224_MBNV2100_B2-3_P01_D1024-1024_PS-3_0/models/mvtec_screw
