In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
import os
_colab_dir = "/content/drive/MyDrive/mvtec_ad_package"
os.chdir(_colab_dir)
os.getcwd()

'/content/drive/MyDrive/mvtec_ad_package'

In [24]:
import os
import numpy as np
import torch
import torch.nn.functional as F
import timm
import torchvision.models as models

!python --version

Python 3.8.10


In [25]:
!pip3 install torch torchvision
!pip3 install faiss_gpu
!pip3 install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [26]:
from src import *
from src import _BACKBONES, _CLASSNAMES

In [27]:
##################################################################################################################
# ----------------------------------------------------------------------------------------------------------------
# list of backbones and its loading code
# ----------------------------------------------------------------------------------------------------------------

# list of backbones
print(_BACKBONES.keys())


# ----------
# code to load
print(_BACKBONES['wideresnet50'])
print(_BACKBONES['mobilenetv2_100'])


dict_keys(['alexnet', 'bninception', 'resnet50', 'resnet101', 'resnext101', 'resnet200', 'resnest50', 'resnetv2_50_bit', 'resnetv2_50_21k', 'resnetv2_101_bit', 'resnetv2_101_21k', 'resnetv2_152_bit', 'resnetv2_152_21k', 'resnetv2_152_384', 'resnetv2_101', 'vgg11', 'vgg19', 'vgg19_bn', 'wideresnet50', 'wideresnet101', 'mnasnet_100', 'mnasnet_a1', 'mnasnet_b1', 'densenet121', 'densenet201', 'inception_v4', 'vit_small', 'vit_base', 'vit_large', 'vit_r50', 'vit_deit_base', 'vit_deit_distilled', 'vit_swin_base', 'vit_swin_large', 'efficientnet_b7', 'efficientnet_b5', 'efficientnet_b3', 'efficientnet_b1', 'efficientnetv2_m', 'efficientnetv2_l', 'efficientnet_b3a', 'mobilenetv2_100'])
models.wide_resnet50_2(pretrained=True)
timm.create_model("mobilenetv2_100", pretrained=True)


In [28]:
  # ----------------------------------------------------------------------------------------------------------------
# set backbone and target layers
# ----------------------------------------------------------------------------------------------------------------

# here set only 1 backbone (backbone_names has only 1 backbone)

# WideResNet50
# backbone_names = ['wideresnet50']
# layers_to_extract_from = ['layer2', 'layer3']


# MobileNetV2_100
backbone_names = ['mobilenetv2_100']
layers_to_extract_from = ['blocks.2', 'blocks.3']


In [29]:
# ----------------------------------------------------------------------------------------------------------------
# arrange layers
# ----------------------------------------------------------------------------------------------------------------

if len(backbone_names) > 1:
    layers_to_extract_from_coll = [[] for _ in range(len(backbone_names))]
    for layer in layers_to_extract_from:
        idx = int(layer.split(".")[0])
        layer = ".".join(layer.split(".")[1:])
        layers_to_extract_from_coll[idx].append(layer)
else:
    layers_to_extract_from_coll = [layers_to_extract_from]

print(f'layers: {layers_to_extract_from_coll}')


layers: [['blocks.2', 'blocks.3']]


In [30]:
# ----------------------------------------------------------------------------------------------------------------
# check backbone
# ----------------------------------------------------------------------------------------------------------------

# select only 1 backbone
backbone_name = backbone_names[0]


# ----------
# load
print(_BACKBONES[backbone_name])

backbone = eval(_BACKBONES[backbone_name])
backbone.name = backbone_name


# ----------
# check backbone
print(backbone)

for (module_name, module) in backbone.named_modules():
    #  print(module_name, module)
     print(module_name)


timm.create_model("mobilenetv2_100", pretrained=True)
EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): ReLU6(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
          16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
 

In [31]:
##################################################################################################################
# ----------------------------------------------------------------------------------------------------------------
# PatchCore instance
# ----------------------------------------------------------------------------------------------------------------

# ----------------------------------------------------------------------------------------------------------------
# load backbone to device
# ----------------------------------------------------------------------------------------------------------------

device = 'cuda' if torch.cuda.is_available() else 'cpu'

backbone = backbone.to(device)

In [32]:
# ----------------------------------------------------------------------------------------------------------------
# patch maker
# ----------------------------------------------------------------------------------------------------------------

patchsize = 3
patchstride = 1

patch_maker = PatchMaker(patchsize, stride=patchstride)

In [33]:
# ----------------------------------------------------------------------------------------------------------------
# construct forward modules
# ----------------------------------------------------------------------------------------------------------------

# base
forward_modules = torch.nn.ModuleDict({})


# ----------
# 1. feature aggregator
feature_aggregator = NetworkFeatureAggregator(backbone, layers_to_extract_from, device)

cropsize = 224
input_shape = (3, cropsize, cropsize)
feature_dimensions = feature_aggregator.feature_dimensions(input_shape)

forward_modules["feature_aggregator"] = feature_aggregator


In [34]:
# ----------
# 2. preprocessing
# adaptive_avg_pool1d (MeanMapper) :  to pretrain_embed_dimension
pretrain_embed_dimension = 1024
# pretrain_embed_dimension = 500
preprocessing = Preprocessing(feature_dimensions, pretrain_embed_dimension)

forward_modules["preprocessing"] = preprocessing


In [35]:
# ----------
# 3. preadapt aggregator
# adaptive_avg_pool1d:  batchsize x number_of_layers x input_dim -> batchsize x target_embed_dimension

target_embed_dimension = 1024
# target_embed_dimension = 250
preadapt_aggregator = Aggregator(target_dim=target_embed_dimension)

_ = preadapt_aggregator.to(device)

forward_modules["preadapt_aggregator"] = preadapt_aggregator


# ----------
# THIS IS THE FORWARD MODULES

for (module_name, module) in forward_modules.named_modules():
    #  print(module_name, module)
     print(module_name)


feature_aggregator
feature_aggregator.backbone
feature_aggregator.backbone.conv_stem
feature_aggregator.backbone.bn1
feature_aggregator.backbone.bn1.drop
feature_aggregator.backbone.bn1.act
feature_aggregator.backbone.blocks
feature_aggregator.backbone.blocks.0
feature_aggregator.backbone.blocks.0.0
feature_aggregator.backbone.blocks.0.0.conv_dw
feature_aggregator.backbone.blocks.0.0.bn1
feature_aggregator.backbone.blocks.0.0.bn1.drop
feature_aggregator.backbone.blocks.0.0.bn1.act
feature_aggregator.backbone.blocks.0.0.se
feature_aggregator.backbone.blocks.0.0.conv_pw
feature_aggregator.backbone.blocks.0.0.bn2
feature_aggregator.backbone.blocks.0.0.bn2.drop
feature_aggregator.backbone.blocks.0.0.bn2.act
feature_aggregator.backbone.blocks.0.0.drop_path
feature_aggregator.backbone.blocks.1
feature_aggregator.backbone.blocks.1.0
feature_aggregator.backbone.blocks.1.0.conv_pw
feature_aggregator.backbone.blocks.1.0.bn1
feature_aggregator.backbone.blocks.1.0.bn1.drop
feature_aggregator.back

In [36]:
# ----------------------------------------------------------------------------------------------------------------
# construct others
#  - nearest neighbour scorer
#  - segmentor
#  - sampler
# ----------------------------------------------------------------------------------------------------------------

faiss_on_gpu = True
faiss_num_workers = 12
nn_method = FaissNN(faiss_on_gpu, faiss_num_workers)

anomaly_score_num_nn = 5
anomaly_scorer = NearestNeighbourScorer(
    n_nearest_neighbours=anomaly_score_num_nn, nn_method=nn_method
)

anomaly_segmentor = RescaleSegmentor(
    device=device, target_size=input_shape[-2:]
)

# ----------
# sampler
percentage = 0.1

feature_sampler = ApproximateGreedyCoresetSampler(
    percentage=percentage,
    device=device,
    number_of_starting_points=10,
    dimension_to_project_features_to=128
)


In [40]:
##################################################################################################################
# ----------------------------------------------------------------------------------------------------------------
# data loader
# ----------------------------------------------------------------------------------------------------------------

data_path = '/content/drive/MyDrive/Data/ImageData/mvtec_ad/'
mvtec_classname = 'screw'

batch_size = 32
train_val_split = 1.0
seed = 0
num_workers = 12

resize = 256

train_dataset = MVTecDataset(
    data_path,
    classname=mvtec_classname,
    resize=resize,
    train_val_split=train_val_split,
    imagesize=cropsize,
    split=DatasetSplit.TRAIN,
    seed=seed,
    augment=True,
)


train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)




In [41]:
# ----------------------------------------------------------------------------------------------------------------
# eval model
# ----------------------------------------------------------------------------------------------------------------

_ = forward_modules.eval()


In [42]:
# ----------------------------------------------------------------------------------------------------------------
# get one batch from train image
# ----------------------------------------------------------------------------------------------------------------

# get one batch
input_images = next(iter(train_dataloader))
input_images = input_images['image']


# load to device
images = input_images.to(torch.float).to(device)

# (32, 3, 224, 224) = (batchsize, 3, cropsize, cropsize)
print(images.shape)


torch.Size([32, 3, 224, 224])


In [43]:
# ----------------------------------------------------------------------------------------------------------------
# feature aggregation
# ----------------------------------------------------------------------------------------------------------------

_ = forward_modules["feature_aggregator"].eval()

with torch.no_grad():
    features = forward_modules["feature_aggregator"](images)

print(features.keys())
# (32, 512, 28, 28) <-- WideResNet50 (layer2, layer3)
# (32, 64, 28, 28) <-- MobileNetV2_100 (blocks.2, blocks.3)
print(features[layers_to_extract_from[0]].shape)
# (32, 1024, 14, 14) <-- WideResNet50
# (32, 64, 14, 14) <-- MobileNetV2_100
print(features[layers_to_extract_from[1]].shape)


features = [features[layer] for layer in layers_to_extract_from]
print(len(features))
# (32, 512, 28, 28) <-- WideResNet50
# (32, 64, 28, 28) <-- MobileNetV2_100
print(features[0].shape)
# (32, 1024, 14, 14) <-- WideResNet50
# (32, 64, 14, 14) <-- MobileNetV2_100
print(features[1].shape)


dict_keys(['blocks.2', 'blocks.3'])
torch.Size([32, 32, 28, 28])
torch.Size([32, 64, 14, 14])
2
torch.Size([32, 32, 28, 28])
torch.Size([32, 64, 14, 14])


In [44]:
# ----------------------------------------------------------------------------------------------------------------
# patchify
# ----------------------------------------------------------------------------------------------------------------

features = [
    patch_maker.patchify(x, return_spatial_info=True) for x in features
]

print(len(features))
print(len(features[0]))
print(len(features[1]))

# (32, 784, 512, 3, 3) = (batchsize, 28*28, original channel, patchsize, patchsize)
# (32, 784, 32, 3, 3)
print(features[0][0].shape)

# (32, 196, 1024, 3, 3) = (batchsize, 14*14, original channel, patchsize, patchsize)
# (32, 196, 64, 3, 3)
print(features[1][0].shape)

# here patchsize = 3 (3 * 3)
print(features[0][0][0][0][0])


2
2
2
torch.Size([32, 784, 32, 3, 3])
torch.Size([32, 196, 64, 3, 3])
tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0000, -1.4482,  3.7063],
        [ 0.0000, 11.4163,  1.6032]], device='cuda:0')


In [45]:
# ----------------------------------------------------------------------------------------------------------------
# get basic info (shapes)
# ----------------------------------------------------------------------------------------------------------------

patch_shapes = [x[1] for x in features]
ref_num_patches = patch_shapes[0]

features = [x[0] for x in features]


# [[28, 28], [14, 14]]
print(patch_shapes)
# [28, 28]
print(ref_num_patches)

# (32, 784, 512, 3, 3)
# (32, 784, 32, 3, 3)
print(features[0].shape)
# (32, 196, 1024, 3, 3)
# (32, 196, 64, 3, 3)
print(features[1].shape)


[[28, 28], [14, 14]]
[28, 28]
torch.Size([32, 784, 32, 3, 3])
torch.Size([32, 196, 64, 3, 3])


In [46]:
# ----------------------------------------------------------------------------------------------------------------
# reshape
# ----------------------------------------------------------------------------------------------------------------

for i in range(1, len(features)):
    _features = features[i]
    patch_dims = patch_shapes[i]

    # TODO(pgehler): Add comments
    _features = _features.reshape(
        _features.shape[0], patch_dims[0], patch_dims[1], *_features.shape[2:]
    )
    _features = _features.permute(0, -3, -2, -1, 1, 2)
    perm_base_shape = _features.shape
    _features = _features.reshape(-1, *_features.shape[-2:])
    _features = F.interpolate(
        _features.unsqueeze(1),
        size=(ref_num_patches[0], ref_num_patches[1]),
        mode="bilinear",
        align_corners=False,
    )
    _features = _features.squeeze(1)
    _features = _features.reshape(
        *perm_base_shape[:-2], ref_num_patches[0], ref_num_patches[1]
    )
    _features = _features.permute(0, -2, -1, 1, 2, 3)
    _features = _features.reshape(len(_features), -1, *_features.shape[-3:])
    features[i] = _features

features = [x.reshape(-1, *x.shape[-3:]) for x in features]

print(len(features))
# (25088, 512, 3, 3) = (batchsize * ref_num_patches, original channel, patchsize, patchsize)
# (25088, 32, 3, 3)
print(features[0].shape)
# (25088, 1024, 3, 3) = (batchsize * ref_num_patches, original channel, patchsize, patchsize)
# (25088, 64, 3, 3)
print(features[1].shape)


2
torch.Size([25088, 32, 3, 3])
torch.Size([25088, 64, 3, 3])


In [47]:
# ----------------------------------------------------------------------------------------------------------------
# preprocessing to 'pretain_embed_dimension'
# ----------------------------------------------------------------------------------------------------------------

# As different feature backbones & patching provide differently
# sized features, these are brought into the correct form here.
features = forward_modules["preprocessing"](features)

print(len(features))
# (2, 500) = (num of layers, pretrain_embed_dimension)
print(features[0].shape)
# (2, 500) = (num of layers, pretrain_embed_dimension)
print(features[1].shape)


25088
torch.Size([2, 1024])
torch.Size([2, 1024])


In [54]:
# ----------------------------------------------------------------------------------------------------------------
# preadapt aggretation to 'target_embed_dimension'
# ----------------------------------------------------------------------------------------------------------------

features = forward_modules["preadapt_aggregator"](features)

# 25088
print(len(features))
# (250) = (target_embed_dimension)
print(features[0].shape)
# (250) = (target_embed_dimension)
print(features[1].shape)



25088
torch.Size([1024])
torch.Size([1024])


In [55]:
# ----------------------------------------------------------------------------------------------------------------
# detach
# ----------------------------------------------------------------------------------------------------------------

batch_features = [x.detach().cpu().numpy() for x in features]

# 25088 = batchsize * ref_num_paches
print(len(batch_features))

# (1024,) = target_embed_dimension
print(batch_features[0].shape)
print(batch_features[1].shape)



25088
(1024,)
(1024,)


In [56]:
# ----------------------------------------------------------------------------------------------------------------
# all features (here only 1 batch)
# ----------------------------------------------------------------------------------------------------------------

all_features = []
all_features.append(batch_features)



In [57]:
# ----------------------------------------------------------------------------------------------------------------
# concatenate all batches  (here only 1 batch)
# ----------------------------------------------------------------------------------------------------------------

all_features = np.concatenate(all_features, axis=0)

print(len(all_features))
print(all_features[0].shape)



25088
(1024,)


In [58]:
# ----------------------------------------------------------------------------------------------------------------
# sampler
# ----------------------------------------------------------------------------------------------------------------

features_embed = feature_sampler.run(all_features)

# 2508 = target_dimension(=1024) * num of layers(=2)
print(len(features_embed))

# (1024,) = target_embed_dimension
print(features_embed[0].shape)



Subsampling...: 100%|██████████| 2508/2508 [00:00<00:00, 3231.48it/s]

2508
(1024,)





In [None]:
# ----------------------------------------------------------------------------------------------------------------
# anomaly scorer
# ----------------------------------------------------------------------------------------------------------------

# anomaly_scorer.fit(detection_features=[features_embed])
