In [15]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import os
import cv2
import torch
import torch.nn as nn
import torchvision
import numpy as np
import pandas as pd
base_dir = os.getcwd()

In [16]:
munich_imgs = os.listdir("scraped_images_grayscaled_big")
furnitures = [r for r in munich_imgs if "linz" and "cp13150" in r ]
furnitures

['1949_33905_id=cp131507_linz.jpg',
 '0711_4263_id=cp131504_linz.jpg',
 '0271_1458_id=cp131506_linz.jpg',
 '0606_3010_id=cp131502_linz.jpg',
 '0791_4970_id=cp131503_linz.jpg',
 '1950_33912_id=cp131509_linz.jpg',
 '1141_8563_id=cp131500_linz.jpg',
 '0270_1455_id=cp131505_linz.jpg',
 '1153_8691_id=cp131501_linz.jpg',
 '1950_33908_id=cp131508_linz.jpg']

In [17]:
#vgg models (16 and 19 similar, but 16 is better)
model = torchvision.models.vgg16(pretrained=True)
# model = torchvision.models.vgg19(pretrained=True)

model.features[0] = nn.Conv2d(1,64,kernel_size=(3,3), stride=(1,1),padding=(1,1))
model = nn.Sequential(*[*list(model.children())[:-1][0][:-10]])

def preprocess_image(image_path):
    """
    This function takes a path to a single image, it then resizes it to size 50x50 \
    and normalizes it to the range [0,1]. Lastly, it adds an extra dimension to the image \
    which represents the batch size. These steps are needed, because we want to pass the image \
    to a CNN. 
    """
    
    img = cv2.imread(image_path, -1)
    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8,8))
    img = clahe.apply(img)
    _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    mask = np.ones(img.shape, np.uint8)
    mask.fill(255)
    cv2.drawContours(mask, contours, 0, 0, -1)
    img = cv2.add(thresh, mask)
    kernel = np.ones((5,5), dtype=np.uint8)
    img = cv2.erode(img, kernel, 10)
    img = np.abs(np.max(img) - img)
    
    img = cv2.resize(img, (50, 50), interpolation=cv2.INTER_AREA)
    preprocess = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor()
    ])
    img = preprocess(img).unsqueeze(0)
    return img

def extract_features(image_path):
    """
    This function takes a path to a single image, it then preprocesses the image with the \
    function preprocess_image. Afterwards it passes the image to the pretrained CNN to extract \
    a feature descriptor. 
    """
    
    img = preprocess_image(image_path)
    with torch.no_grad():
        features = model(img)
    return features.squeeze(0).numpy()

def normalize_features(features):
    """
    This function takes the feature descriptor and normalizes it. This is needed as we want \
    to compute the dot-product similarity between feature descriptors of different images. \
    And for similarity it is convenient to have all pixels on the same scale without too \
    much magnitude differences and this also ensures stability. 
    """
    
    return features / np.linalg.norm(features)
    



In [18]:
# # Resnet models (gives high scores regardless)
# # model = torchvision.models.resnet50(pretrained=True)
# # model = torchvision.models.resnet101(pretrained=True) 
# # model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# # model = nn.Sequential(*list(model.children())[:-1])

# def preprocess_image(image_path):
#     """
#     This function takes a path to a single image, it then resizes it to size 50x50 \
#     and normalizes it to the range [0,1]. Lastly, it adds an extra dimension to the image \
#     which represents the batch size. These steps are needed, because we want to pass the image \
#     to a CNN. 
#     """
    
#     img = cv2.imread(image_path, -1)
#     clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8,8))
#     img = clahe.apply(img)
#     _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
#     contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#     contours = sorted(contours, key=cv2.contourArea, reverse=True)
#     mask = np.ones(img.shape, np.uint8)
#     mask.fill(255)
#     cv2.drawContours(mask, contours, 0, 0, -1)
#     img = cv2.add(thresh, mask)
#     kernel = np.ones((5,5), dtype=np.uint8)
#     img = cv2.erode(img, kernel, 10)
#     img = np.abs(np.max(img) - img)
    
#     img = cv2.resize(img, (50, 50), interpolation=cv2.INTER_AREA)
#     preprocess = torchvision.transforms.Compose([
#         torchvision.transforms.ToTensor()
#     ])
#     img = preprocess(img).unsqueeze(0)
#     return img

# def extract_features(image_path):
#     """
#     This function takes a path to a single image, it then preprocesses the image with the \
#     function preprocess_image. Afterwards it passes the image to the pretrained CNN to extract \
#     a feature descriptor. 
#     """
    
#     img = preprocess_image(image_path)
#     with torch.no_grad():
#         features = model(img)
#     return features.squeeze(0).numpy()

# def normalize_features(features):
#     """
#     This function takes the feature descriptor and normalizes it. This is needed as we want \
#     to compute the dot-product similarity between feature descriptors of different images. \
#     And for similarity it is convenient to have all pixels on the same scale without too \
#     much magnitude differences and this also ensures stability. 
#     """
    
#     return features / np.linalg.norm(features)
    

In [19]:
# # InceptionV3 model (potential)
# model = torchvision.models.inception_v3(pretrained=True, aux_logits=True)

# def preprocess_image(image_path):
#     """
#     Preprocesses a single image for InceptionV3 input.
#     """
#     # img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read the image as grayscale
#     # img = cv2.resize(img, (299, 299))  # Resize to 299x299 for InceptionV3
#     # img = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
    
#     # # Since the model expects 3 channels, we stack the grayscale image to create 3 channels
#     # img = np.stack([img, img, img], axis=-1)
    
#     # img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])  # Normalize with ImageNet mean and std
#     # img = np.transpose(img, (2, 0, 1))  # Transpose to (channels, height, width)
#     # img = np.expand_dims(img, axis=0)  # Add batch dimension
#     # img = torch.tensor(img, dtype=torch.float32)  # Convert to PyTorch tensor with dtype float32
#         # Read the image as grayscale
#     img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
#     # Resize to 299x299 for InceptionV3
#     img = cv2.resize(img, (299, 299))
    
#     # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
#     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
#     img = clahe.apply(img)
    
#     # Normalize to [0, 1]
#     img = np.array(img, dtype=np.float32) / 255.0
    
#     # Since the model expects 3 channels, we stack the grayscale image to create 3 channels
#     img = np.stack([img, img, img], axis=-1)
    
#     # Apply Gamma correction
#     gamma = 1.2  # Example gamma value, adjust as necessary
#     img = np.power(img, gamma)
    
#     # Normalize with ImageNet mean and std
#     img = (img - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
    
#     # Transpose to (channels, height, width)
#     img = np.transpose(img, (2, 0, 1))
    
#     # Add batch dimension
#     img = np.expand_dims(img, axis=0)
    
#     # Convert to PyTorch tensor with dtype float32
#     img = torch.tensor(img, dtype=torch.float32)

#     return img

# def extract_features(image_path):
#     """
#     Extract features from a single image using InceptionV3.
#     """
#     img = preprocess_image(image_path)
#     model.eval()  # Set model to evaluation mode
#     with torch.no_grad():
#         features = model(img)
#     return features.squeeze(0).numpy()

# def normalize_features(features):
#     """
#     Normalize the extracted features.
#     """
#     return features / np.linalg.norm(features)


In [20]:
# # Mobile net v2 (gives to high of scores to everything)
# model = torchvision.models.mobilenet_v2(pretrained=True)

# # Modify the first convolutional layer to accept grayscale images
# model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# def preprocess_image(image_path):
#     """
#     Preprocesses a single grayscale image for MobileNetV2 input.
#     """
#     img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read the image as grayscale
#     img = cv2.resize(img, (224, 224))  # Resize to 224x224 for MobileNetV2
#     img = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
    
#     img = np.expand_dims(img, axis=0)  # Add channel dimension
#     img = np.expand_dims(img, axis=0)  # Add batch dimension
#     img = torch.tensor(img, dtype=torch.float32)  # Convert to PyTorch tensor with dtype float32
#     return img

# def extract_features(image_path):
#     """
#     Extract features from a single image using MobileNetV2.
#     """
#     img = preprocess_image(image_path)
#     model.eval()  # Set model to evaluation mode
#     with torch.no_grad():
#         features = model.features(img)
#     return features.squeeze(0).numpy()

# def normalize_features(features):
#     """
#     Normalize the extracted features.
#     """
#     return features / np.linalg.norm(features)

In [21]:
# # DenseNet (gives similar scores to everything)
# model = torchvision.models.densenet121(pretrained=True)

# # Modify the first convolutional layer to accept grayscale images
# model.features.conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# def preprocess_image(image_path):
#     """
#     Preprocesses a single grayscale image for DenseNet input.
#     """
#     img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read the image as grayscale
#     img = cv2.resize(img, (224, 224))  # Resize to 224x224 for DenseNet
#     img = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
    
#     img = np.expand_dims(img, axis=0)  # Add channel dimension
#     img = np.expand_dims(img, axis=0)  # Add batch dimension
#     img = torch.tensor(img, dtype=torch.float32)  # Convert to PyTorch tensor with dtype float32
#     return img

# def extract_features(image_path):
#     """
#     Extract features from a single image using DenseNet.
#     """
#     img = preprocess_image(image_path)
#     model.eval()  # Set model to evaluation mode
#     with torch.no_grad():
#         features = model.features(img)
#     return features.squeeze(0).numpy()

# def normalize_features(features):
#     """
#     Normalize the extracted features.
#     """
#     return features / np.linalg.norm(features)


In [22]:
# # Alexnet (gives good scores unconditionally)
# model = torchvision.models.alexnet(pretrained=True)

# # Modify the first convolutional layer to accept grayscale images
# model.features[0] = nn.Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))

# def preprocess_image(image_path):
#     """
#     Preprocesses a single grayscale image for AlexNet input.
#     """
#     img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read the image as grayscale
#     img = cv2.resize(img, (224, 224))  # Resize to 224x224 for AlexNet
#     img = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
    
#     img = np.expand_dims(img, axis=0)  # Add channel dimension
#     img = np.expand_dims(img, axis=0)  # Add batch dimension
#     img = torch.tensor(img, dtype=torch.float32)  # Convert to PyTorch tensor with dtype float32
#     return img

# def extract_features(image_path):
#     """
#     Extract features from a single image using AlexNet.
#     """
#     img = preprocess_image(image_path)
#     model.eval()  # Set model to evaluation mode
#     with torch.no_grad():
#         features = model.features(img)
#     return features.squeeze(0).numpy()

# def normalize_features(features):
#     """
#     Normalize the extracted features.
#     """
#     return features / np.linalg.norm(features)

In [23]:
list(model.children())

[Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(512, 512, kernel_size=(3, 3), stride=(1

In [24]:
def compute_similarities_testsets(munich_testset, nk_testset, 
                                  munich_path="munich_test_no_back", 
                                  nk_path="nk_test_no_back"):
    """
    This function takes four arguments: 
    - munich_testset, which contains 5 grayscaled images from the munich database.
    - nk_testset, which contains 5 grayscaled images from the nk collection API.
    - munich path, the path to the directory of the munich images. 
    - nk_path, the path to the directory of the nk images. 
    
    It then computes the feature descriptors for the munich images and all the \
    nk collection images. Afterwards takes the dot-product to get the dot-product similiarity. 
    It then saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    similarities = {}
    for nk_img in nk_testset:
        nk_img_path = os.path.join(nk_path, nk_img)
        for munich_img in munich_testset:
            munich_img_path = os.path.join(munich_path, munich_img)
            nk_img_feature_descriptor = normalize_features(extract_features(nk_img_path).flatten())
            munich_img_feature_descriptor = normalize_features(extract_features(munich_img_path).flatten())
            similarity = np.dot(
                nk_img_feature_descriptor,
                munich_img_feature_descriptor
            )
            similarities[(nk_img, munich_img)] = similarity.item()
        
    return similarities

def compute_similarities_img_to_set(nk_img, munich_testset, 
                                    munich_path="mc_no_back", 
                                    nk_path="nk_no_back"):
    similarities = {}
    nk_img_path = os.path.join(nk_path, nk_img)
    
    # Ensure the NK image file exists
    if not os.path.exists(nk_img_path):
        raise FileNotFoundError(f"NK image file {nk_img_path} not found.")
    
    nk_img_feature_descriptor = normalize_features(extract_features(nk_img_path).flatten())
    
    for munich_img in munich_testset:
        munich_img_path = os.path.join(munich_path, munich_img)
        
        # Ensure the Munich image file exists
        if not os.path.exists(munich_img_path):
            raise FileNotFoundError(f"Munich image file {munich_img_path} not found.")
        
        munich_img_feature_descriptor = normalize_features(extract_features(munich_img_path).flatten())
        similarity = np.dot(
            nk_img_feature_descriptor,
            munich_img_feature_descriptor
        )
        similarities[(nk_img, munich_img)] = similarity.item()
        
    return similarities

In [26]:
nk_testset = os.listdir("nk_test_no_back")
munich_testset = os.listdir("munich_test_no_back")

sims_test = compute_similarities_testsets(munich_testset, nk_testset)
sims_test

error: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/clahe.cpp:353: error: (-215:Assertion failed) _src.type() == CV_8UC1 || _src.type() == CV_16UC1 in function 'apply'


In [None]:
munich_set = os.listdir("scraped_images_grayscaled_big")
nk_img = ("speeltafel_nk.png")

sims = compute_similarities_img_to_set(nk_img, munich_set, munich_path='scraped_images_grayscaled_big' ,nk_path='nk_test_no_back')
# print(sims)

In [None]:
# def get_table(sims):
#     """
#     This function takes the output produced by either the compute_similarities \ 
#     or compute_similarities_testsets function, and returns a pandas dataframe/table \
#     and also saves it in excel.
#     """
    
#     data = {}
#     rows = []

#     for key, value in sims.items():
#         if key[0] not in data:
#             data[key[0]] = []
#         if key[1] not in rows:
#             rows.append(key[1])
#         data[key[0]].append(value)
        
#     data = {key[:key.rfind(".")]:value for key, value in data.items()}
#     rows = [row[:row.rfind(".")] for row in rows]
        
#     df = pd.DataFrame(data, index=rows)
#     #df.to_excel('output.xlsx')
#     return df.T
    
# get_table(sims_test)

In [None]:
def get_top_10_similarities(similarities):
    """
    Get the top 10 highest similarity values for each NK image from the similarities dictionary.
    
    Parameters:
    - similarities: A dictionary where keys are (NK image name, Munich image name) tuples
                    and values are similarity scores.
                    
    Returns:
    - A dictionary where keys are NK image names and values are lists of tuples
      (Munich image name, similarity) sorted by similarity in descending order.
    """
    top_10_similarities = {}
    
    for nk_img_name in set(key[0] for key in similarities.keys()):
        # Filter similarities for current NK image
        nk_similarities = [(munich_img_name, similarity) for (nk, munich_img_name), similarity in similarities.items() if nk == nk_img_name]
        
        # Sort by similarity in descending order
        sorted_similarities = sorted(nk_similarities, key=lambda x: x[1], reverse=True)
        
        # Get top 10 similarities
        top_10_similarities[nk_img_name] = sorted_similarities[:10]
    
    return top_10_similarities


get_top_10_similarities(sims)


{'speeltafel_nk.png': [('0543_2543_id=cp151474_badv.jpg', 0.5409100651741028),
  ('0543_2543_id=cp151473_badv.jpg', 0.5401169657707214),
  ('2075_41133_id=cp159122_badv.jpg', 0.5321546196937561),
  ('2075_41136_id=cp159123_badv.jpg', 0.5199837684631348),
  ('1110_8010-17_id=cp174107_badv.jpg', 0.5186924338340759),
  ('0238_1286-1_id=cp139334_badv.jpg', 0.5139502882957458),
  ('2152_43796_id=cp162079_badv.jpg', 0.48791271448135376),
  ('2278_46800_id=cp164916_badv.jpg', 0.4876925051212311),
  ('2152_43797_id=cp162080_badv.jpg', 0.48460230231285095),
  ('1554_13932_id=cp140925_badv.jpg', 0.48370492458343506)]}