In [34]:
import os
import cv2
import torch
import torch.nn as nn
import torchvision
import numpy as np
import pandas as pd

In [35]:
base_dir = os.getcwd()
optional_path = "Downloads"
relative_path_nk = "nk_collection_meubels_cleaned"
relative_path_munich = "scraped_images_grayscaled_big"
abs_path_nk = os.path.join(base_dir, optional_path, relative_path_nk)
abs_path_munich = os.path.join(base_dir, optional_path, relative_path_munich)

In [37]:
model = torchvision.models.vgg16(pretrained=True)
model.features[0] = nn.Conv2d(1,64,kernel_size=(3,3), stride=(1,1),padding=(1,1))
model = nn.Sequential(*[*list(model.children())[:-1][0][:-10]])

def preprocess_image(image_path):
    """
    This function takes a path to a single image, it then resizes it to size 50x50 \
    and normalizes it to the range [0,1]. Lastly, it adds an extra dimension to the image \
    which represents the batch size. These steps are needed, because we want to pass the image \
    to a CNN. 
    """
    
    img = cv2.imread(image_path, -1)
    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8,8))
    img = clahe.apply(img)
    _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    mask = np.ones(img.shape, np.uint8)
    mask.fill(255)
    cv2.drawContours(mask, contours, 0, 0, -1)
    img = cv2.add(thresh, mask)
    kernel = np.ones((5,5), dtype=np.uint8)
    img = cv2.erode(img, kernel, 10)
    img = np.abs(np.max(img) - img)
    
    img = cv2.resize(img, (50, 50), interpolation=cv2.INTER_AREA)
    preprocess = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor()
    ])
    img = preprocess(img).unsqueeze(0)
    return img

def extract_features(image_path):
    """
    This function takes a path to a single image, it then preprocesses the image with the \
    function preprocess_image. Afterwards it passes the image to the pretrained CNN to extract \
    a feature descriptor. 
    """
    
    img = preprocess_image(image_path)
    with torch.no_grad():
        features = model(img)
    return features.squeeze(0).numpy()

def normalize_features(features):
    """
    This function takes the feature descriptor and normalizes it. This is needed as we want \
    to compute the dot-product similarity between feature descriptors of different images. \
    And for similarity it is convenient to have all pixels on the same scale without too \
    much magnitude differences and this also ensures stability. 
    """
    
    return features / np.linalg.norm(features)
    

In [38]:
#best_model_state_dict = model.state_dict()
#torch.save(best_model_state_dict, "best2_vgg16_weights.pth")
best_model_state_dict = torch.load("best2_vgg16_weights.pth")
model.load_state_dict(best_model_state_dict)
model.eval()

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [46]:
nk_img = os.path.join(base_dir, optional_path, "nk_testset", "kast_nk.jpg")
munich_imgs = os.listdir(abs_path_munich)

def compute_similarities(nk_img, munich_imgs, 
                         path=abs_path_munich):
    """
    This function takes three arguments: 
    - nk_img, which is a single image from the nk collection. 
    - munich_imgs, this contains all images from the Munich Database. 
    - path, this is the path to the gray scaled Munich Database.
    
    It then computes the feature descriptor for the nk collection image and all the images in the \
    Munich Database. Afterwards takes the dot-product to get the dot-product similiarity. It then \
    saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    similarities = {}
    nk_img_feature_descriptor = normalize_features(extract_features(nk_img).flatten())
    for img in munich_imgs:
        img_path = os.path.join(path, img)
        munich_img_feature_descriptor = normalize_features(extract_features(img_path).flatten())
        similarity = np.dot(
            nk_img_feature_descriptor,
            munich_img_feature_descriptor
        )
        munich_img_name = img_path[img_path.rfind("/")+1:]
        nk_img_name = nk_img[nk_img.rfind("/")+1:]
        similarities[(nk_img_name, munich_img_name)] = similarity.item()
        
    return similarities
    
sims_complete = compute_similarities(nk_img, munich_imgs)

In [33]:
filtered = {k:v for k,v in sims_complete.items() if v > 0.89}
imgs = os.listdir(abs_path_munich)

NameError: name 'sims_complete' is not defined

In [32]:
img = cv2.imread(os.path.join(abs_path_munich, '0640_3818_id=cp132632_linz.jpg'), -1)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [39]:
nk_testset = os.listdir(os.path.join(base_dir, optional_path, "nk_testset"))
munich_testset = os.listdir(os.path.join(base_dir, optional_path, "munich_testset"))

def compute_similarities_testsets(munich_testset, nk_testset, 
                                  munich_path=os.path.join(base_dir, optional_path, "munich_testset"), 
                                  nk_path=os.path.join(base_dir, optional_path, "nk_testset")):
    """
    This function takes four arguments: 
    - munich_testset, which contains 5 grayscaled images from the munich database.
    - nk_testset, which contains 5 grayscaled images from the nk collection API.
    - munich path, the path to the directory of the munich images. 
    - nk_path, the path to the directory of the nk images. 
    
    It then computes the feature descriptors for the munich images and all the \
    nk collection images. Afterwards takes the dot-product to get the dot-product similiarity. 
    It then saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    similarities = {}
    for nk_img in nk_testset:
        nk_img_path = os.path.join(nk_path, nk_img)
        for munich_img in munich_testset:
            munich_img_path = os.path.join(munich_path, munich_img)
            nk_img_feature_descriptor = normalize_features(extract_features(nk_img_path).flatten())
            munich_img_feature_descriptor = normalize_features(extract_features(munich_img_path).flatten())
            similarity = np.dot(
                nk_img_feature_descriptor,
                munich_img_feature_descriptor
            )
            similarities[(nk_img, munich_img)] = similarity.item()
        
    return similarities
    
sims = compute_similarities_testsets(munich_testset, nk_testset)

In [40]:
def get_table(sims):
    """
    This function takes the output produced by either the compute_similarities \ 
    or compute_similarities_testsets function, and returns a pandas dataframe/table \
    and also saves it in excel.
    """
    
    data = {}
    rows = []

    for key, value in sims.items():
        if key[0] not in data:
            data[key[0]] = []
        if key[1] not in rows:
            rows.append(key[1])
        data[key[0]].append(value)
        
    data = {key[:key.rfind(".")]:value for key, value in data.items()}
    rows = [row[:row.rfind(".")] for row in rows]
        
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T
    
get_table(sims)

Unnamed: 0,stoel_mccp,tafel_mccp,kast_mccp,dressoir_mccp,speeltafel_mccp
kast_nk,0.660421,0.661928,0.853438,0.841007,0.686943
speeltafel_nk,0.548683,0.570109,0.554375,0.545816,0.636547
tafel_nk,0.482328,0.558129,0.517739,0.484007,0.552639
dressoir_nk,0.69672,0.688964,0.709567,0.752601,0.590738
stoel_nk,0.778562,0.685584,0.7317,0.749232,0.589983
