In [None]:
from torchvision.models import detection
import numpy as np
import argparse
import pickle
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import cv2
from torch.autograd import Variable
from PIL import Image
from sklearn.metrics.pairwise import cosine_distances,pairwise_distances,cosine_similarity
import os

In [None]:
def get_vector(image_name):
    """
        Get embeddings for a single instance of image

    :type image_name: pillow image
    """
    # Run some image processing steps. Resize and normalize
    scaler = transforms.Resize((224, 224))
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    to_tensor = transforms.ToTensor()

    # Load resnet pretrained model
    model_all = models.resnet152(pretrained=True)
    # Remove the last classification layer and create a new model
    model = torch.nn.Sequential(*list(model_all.children())[:-1])

    model.eval()

    if isinstance(image_name, str):
        img = Image.open(image_name)
    else:
        img = image_name
    # Create a PyTorch Variable with the transformed image, run preprocessing steps
    t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0))
    # The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(512)
    # embedding is a 512 dimension 1D vector
    my_embedding = model(t_img).squeeze()
    return my_embedding

In [None]:

def sliding_window(image, stepSize_x, stepSize_y, windowSize):
    # slide a window across the image
    for y in range(0, image.shape[0], stepSize_y):
        for x in range(0, image.shape[1], stepSize_x):
            # yield the current window
            yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])


In [None]:
if __name__ == '__main__':
    image_name_1 = './data/2021-04-07 21_39_06_exai_rpi_001_85.jpg'
    image_name_2 = './data/2021-04-07 21_37_18_exai_rpi_001_58.jpg'
    image_name_3 = './data/2021-04-07 21_40_10_exai_rpi_001_101.jpg'
    image_name_4 = './data/2021-04-07 21_45_00_exai_rpi_004_176.jpg'
    image_name_5 = './data/2021-04-07 21_43_40_exai_rpi_004_156.jpg'
    image_name_6 = './data/2021-04-07 21_39_20_exai_rpi_004_91.jpg'
    image_name_7 = './data/2021-04-07 21_36_40_exai_rpi_004_51.jpg'
    

    all_images = [image_name_1, image_name_2, image_name_3, image_name_4, image_name_5, image_name_6, image_name_7]


    for index_img, image_name in enumerate(all_images):
        # Read the entire image
        whole_image = cv2.imread(image_name)
        # Get the list of sliding window images
        all_imgs = list(sliding_window(whole_image,  96, 122, (96*4, 122*3)))

        count = 0
        all_embeds = []
        x_pos = []
        y_pos = []
        # Here we give the sliding window images to the network one by one. Batching could be more efficient.
        # In this loop, convert opencv image to pillow image and get the embedding for it
        for img in all_imgs:
            color_converted = cv2.cvtColor(img[2], cv2.COLOR_BGR2RGB)
            pil_image_2 = Image.fromarray(color_converted)
            count = count + 1
            embeds = get_vector(pil_image_2)
            all_embeds.append(embeds.detach().numpy())

        # Stack all embedding and get all x,y positions of sliding window images.
        all_embeds = np.stack(all_embeds, axis = 0)
        np.save('./data/embeddings/embedding' + str(index_img) + '.npy', all_embeds)

        x_pos_set = np.array([img[0] for img in all_imgs])
        y_pos_set = np.array([img[1] for img in all_imgs])

        # Obtain cosine similarity matrix of all embeddings.
        cosine_sim_mat =  cosine_similarity(all_embeds)
        # Take the mean similarity score.
        mean_distances = np.mean(cosine_sim_mat, axis=0)

        count = 0
        # Loop over mean distances, if the mean distance is below the threshold. Tag the image patch as anomaly and save the patch
        for inds, distances in enumerate(mean_distances):
            if 0.65 > distances:
                window_name = 'image'
                count = count + 1
                print(distances)
                print(count)
                print(x_pos_set[inds])
                print(y_pos_set[inds])
                # Using cv2.imshow() method
                # Displaying the image
                cv2.imshow('test', mat = all_imgs[inds][2])
                cv2.imwrite('./data/embeddings/detected_patches_' + str(index_img) + '_' + str(count)+ '.jpg', all_imgs[inds][2])