In [1]:
import os
import cv2
import torch
import torch.nn as nn
import torchvision
import numpy as np
import pandas as pd

In [2]:
base_dir = os.getcwd()
optional_path = "Downloads"
relative_path_nk = "nk_collection_meubels_cleaned"
relative_path_munich = "scraped_images_grayscaled_big"
abs_path_nk = os.path.join(base_dir, optional_path, relative_path_nk)
abs_path_munich = os.path.join(base_dir, optional_path, relative_path_munich)

In [44]:
model = torchvision.models.vgg16(pretrained=True)
model.features[0] = nn.Conv2d(1,64,kernel_size=(3,3), stride=(1,1),padding=(1,1))
model = nn.Sequential(*[*list(model.children())[:-1][0][:-10]])

def preprocess_image(image_path):
    """
    This function takes a path to a single image, it then resizes it to size 50x50 \
    and normalizes it to the range [0,1]. Lastly, it adds an extra dimension to the image \
    which represents the batch size. These steps are needed, because we want to pass the image \
    to a CNN. 
    """
    
    img = cv2.imread(image_path, -1)
    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8,8))
    img = clahe.apply(img)
    _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    mask = np.ones(img.shape, np.uint8)
    mask.fill(255)
    cv2.drawContours(mask, contours, 0, 0, -1)
    img = cv2.add(thresh, mask)
    kernel = np.ones((5,5), dtype=np.uint8)
    img = cv2.erode(img, kernel, 10)
    img = np.abs(np.max(img) - img)
    
    img = cv2.resize(img, (50, 50), interpolation=cv2.INTER_AREA)
    preprocess = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor()
    ])
    img = preprocess(img).unsqueeze(0)
    return img

def extract_features(image_path):
    """
    This function takes a path to a single image, it then preprocesses the image with the \
    function preprocess_image. Afterwards it passes the image to the pretrained CNN to extract \
    a feature descriptor. 
    """
    
    img = preprocess_image(image_path)
    with torch.no_grad():
        features = model(img)
    return features.squeeze(0).numpy()

def normalize_features(features):
    """
    This function takes the feature descriptor and normalizes it. This is needed as we want \
    to compute the dot-product similarity between feature descriptors of different images. \
    And for similarity it is convenient to have all pixels on the same scale without too \
    much magnitude differences and this also ensures stability. 
    """
    
    return features / np.linalg.norm(features)
    

In [45]:
#best_model_state_dict = model.state_dict()
#torch.save(best_model_state_dict, "best2_vgg16_weights.pth")
best_model_state_dict = torch.load("best2_vgg16_weights.pth")
model.load_state_dict(best_model_state_dict)
model.eval()

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [7]:
nk_img = os.path.join(base_dir, optional_path, "nk_testset", "kast_nk.jpg")
munich_imgs = os.listdir(abs_path_munich)

def compute_similarities(nk_img, munich_imgs, 
                         path=abs_path_munich):
    """
    This function takes three arguments: 
    - nk_img, which is a single image from the nk collection. 
    - munich_imgs, this contains all images from the Munich Database. 
    - path, this is the path to the gray scaled Munich Database.
    
    It then computes the feature descriptor for the nk collection image and all the images in the \
    Munich Database. Afterwards takes the dot-product to get the dot-product similiarity. It then \
    saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    similarities = {}
    nk_img_feature_descriptor = normalize_features(extract_features(nk_img).flatten())
    for img in munich_imgs:
        img_path = os.path.join(path, img)
        munich_img_feature_descriptor = normalize_features(extract_features(img_path).flatten())
        similarity = np.dot(
            nk_img_feature_descriptor,
            munich_img_feature_descriptor
        )
        munich_img_name = img_path[img_path.rfind("/")+1:]
        nk_img_name = nk_img[nk_img.rfind("/")+1:]
        similarities[(nk_img_name, munich_img_name)] = similarity.item()
        
    return similarities
    
sims_complete = compute_similarities(nk_img, munich_imgs)

In [214]:
nk_no_back = os.listdir("/home/hamid/Downloads/nk_no_back")
img = cv2.imread(os.path.join(base_dir, optional_path, "nk_no_back", "meubel_22.jpg"), -1)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
"""
cv2.imshow("img", cv2.cvtColor(img, cv2.COLOR_BGR2GRAY))
cv2.waitKey(0)
cv2.destroyAllWindows()
"""
_, thresh = cv2.threshold(img, 110, 255, cv2.THRESH_BINARY)
cv2.imshow("img", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [29]:
filtered = {k:v for k,v in sims_complete.items() if v > 0.82}
sorted_filtered = sorted(filtered.items(), key=lambda item: item[1], reverse=True)[20:30]
sorted_filtered

[(('kast_nk.jpg', '0270_1450_id=cp131480_linz.jpg'), 0.9085464477539062),
 (('kast_nk.jpg', '1083_7988-67_id=cp173744_badv.jpg'), 0.9084235429763794),
 (('kast_nk.jpg', '0858_5593-1_id=cp168777_badv.jpg'), 0.9077965021133423),
 (('kast_nk.jpg', '0718_4316_id=cp131435_linz.jpg'), 0.9069232940673828),
 (('kast_nk.jpg', '1085_7988-96_id=cp173776_badv.jpg'), 0.9068341255187988),
 (('kast_nk.jpg', '0107_613-4_id=cp169665_badv.jpg'), 0.9065513014793396),
 (('kast_nk.jpg', '0396_1794-8_id=cp145569_badv.jpg'), 0.9059417843818665),
 (('kast_nk.jpg', '1070_7951-24_id=cp173281_badv.jpg'), 0.9056847095489502),
 (('kast_nk.jpg', '0228_1252-11_id=cp138846_badv.jpg'), 0.9056298732757568),
 (('kast_nk.jpg', '0082_420-6_id=cp160089_badv.jpg'), 0.9056044816970825)]

In [31]:
img = cv2.imread(os.path.join(abs_path_munich, '1083_7988-67_id=cp173744_badv.jpg'), -1)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [46]:
nk_testset = os.listdir(os.path.join(base_dir, optional_path, "nk_testset"))
munich_testset = os.listdir(os.path.join(base_dir, optional_path, "munich_testset"))

def compute_similarities_testsets(munich_testset, nk_testset, 
                                  munich_path=os.path.join(base_dir, optional_path, "munich_testset"), 
                                  nk_path=os.path.join(base_dir, optional_path, "nk_testset")):
    """
    This function takes four arguments: 
    - munich_testset, which contains 5 grayscaled images from the munich database.
    - nk_testset, which contains 5 grayscaled images from the nk collection API.
    - munich path, the path to the directory of the munich images. 
    - nk_path, the path to the directory of the nk images. 
    
    It then computes the feature descriptors for the munich images and all the \
    nk collection images. Afterwards takes the dot-product to get the dot-product similiarity. 
    It then saves the similarity and the two images as key-value pairs in a dictionary. 
    """
    
    similarities = {}
    for nk_img in nk_testset:
        nk_img_path = os.path.join(nk_path, nk_img)
        for munich_img in munich_testset:
            munich_img_path = os.path.join(munich_path, munich_img)
            nk_img_feature_descriptor = normalize_features(extract_features(nk_img_path).flatten())
            munich_img_feature_descriptor = normalize_features(extract_features(munich_img_path).flatten())
            similarity = np.dot(
                nk_img_feature_descriptor,
                munich_img_feature_descriptor
            )
            similarities[(nk_img, munich_img)] = similarity.item()
        
    return similarities
    
sims = compute_similarities_testsets(munich_testset, nk_testset)

In [49]:
def get_table(sims):
    """
    This function takes the output produced by either the compute_similarities \ 
    or compute_similarities_testsets function, and returns a pandas dataframe/table \
    and also saves it in excel.
    """
    
    data = {}
    rows = []

    for key, value in sims.items():
        if key[0] not in data:
            data[key[0]] = []
        if key[1] not in rows:
            rows.append(key[1])
        value = np.round(value, 3)
        data[key[0]].append(value)
        
    data = {key[:key.rfind(".")]:value for key, value in data.items()}
    rows = [row[:row.rfind(".")] for row in rows]
        
    df = pd.DataFrame(data, index=rows)
    #df.to_excel('output.xlsx')
    return df.T
    
get_table(sims)

Unnamed: 0,stoel_mccp,tafel_mccp,kast_mccp,dressoir_mccp,speeltafel_mccp
kast_nk,0.66,0.662,0.853,0.841,0.687
speeltafel_nk,0.549,0.57,0.554,0.546,0.637
tafel_nk,0.482,0.558,0.518,0.484,0.553
dressoir_nk,0.697,0.689,0.71,0.753,0.591
stoel_nk,0.779,0.686,0.732,0.749,0.59


In [34]:
model2 = torchvision.models.vgg16(pretrained=True)
model2.features[0] = nn.Conv2d(1,64,kernel_size=(3,3), stride=(1,1),padding=(1,1))
model2.classifier[-1] = nn.Linear(model2.classifier[-1].in_features, 2)

In [43]:
import torch.nn.functional as F
import random
import shutil
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from PIL import Image

def select_subset_mun(sample_size, source_dir=abs_path_munich, 
                     target_dir=os.path.join(base_dir, optional_path, "mun_cleaned")):
    mun_imgs = os.listdir(abs_path_munich)
    subset_mun = random.sample(mun_imgs, sample_size)
    for img in subset_mun:
        source_img = os.path.join(source_dir, img)
        target_img = os.path.join(target_dir, img)
        shutil.copyfile(source_img, target_img)
        
def delete_subset_mun(sample_size, dir_path=os.path.join(base_dir, optional_path, "mun_cleaned")):
    imgs = os.listdir(dir_path)
    random_subset = random.sample(imgs, sample_size)
    for img in random_subset:
        img_path = os.path.join(dir_path, img)
        if os.path.isfile(img_path) or os.path.islink(img_path):
            os.unlink(img_path)

class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = cv2.imread(img_path, -1)
        img = cv2.resize(img, (50,50), interpolation=cv2.INTER_AREA)
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label
    
def load_data(base_dir=base_dir, optional_path=optional_path):
    nk_imgs = os.listdir(os.path.join(base_dir, "nk_collection_meubels_cleaned"))
    mun_imgs = os.listdir(os.path.join(base_dir, optional_path, "mun_cleaned"))
    nk_imgs_paths = [os.path.join(
        base_dir, "nk_collection_meubels_cleaned", img
    ) for img in nk_imgs]
    mun_imgs_paths = [os.path.join(
        base_dir, optional_path, "mun_cleaned", img
    ) for img in mun_imgs]
    
    img_paths = nk_imgs_paths + mun_imgs_paths
    labels = [0] * len(nk_imgs_paths) + [1] * len(mun_imgs_paths)
    
    return img_paths, labels

image_paths, labels = load_data()
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, 
                                                                    test_size=0.2, random_state=42)

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

train_dataset = CustomDataset(train_paths, train_labels, transform=transform)
val_dataset = CustomDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.05)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for (images, labels) in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for (images, labels) in val_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
        print(
            f"Epoch: {epoch+1}/{num_epochs}",
            f"Train Loss: {running_loss/len(train_loader):.4f}",
            f"Validation Loss: {val_loss/len(val_loader):.4f}",
            f"Validation Accuracy: {correct/total:.4f}",
            sep="\n"
        )

train_model(model2, train_loader, val_loader, criterion, optimizer)

Epoch: 1/15
Train Loss: 0.6964
Validation Loss: 0.6925
Validation Accuracy: 0.5310
Epoch: 2/15
Train Loss: 0.6945
Validation Loss: 0.6940
Validation Accuracy: 0.4690
Epoch: 3/15
Train Loss: 0.6941
Validation Loss: 0.6940
Validation Accuracy: 0.4690
Epoch: 4/15
Train Loss: 0.6941
Validation Loss: 0.6980
Validation Accuracy: 0.4690
Epoch: 5/15
Train Loss: 0.6952
Validation Loss: 0.6916
Validation Accuracy: 0.5310
Epoch: 6/15
Train Loss: 0.6960
Validation Loss: 0.7036
Validation Accuracy: 0.4690
Epoch: 7/15
Train Loss: 0.6948
Validation Loss: 0.6950
Validation Accuracy: 0.4690
Epoch: 8/15
Train Loss: 0.6946
Validation Loss: 0.6989
Validation Accuracy: 0.4690
Epoch: 9/15
Train Loss: 0.6961
Validation Loss: 0.6919
Validation Accuracy: 0.5310
Epoch: 10/15
Train Loss: 0.6931
Validation Loss: 0.7018
Validation Accuracy: 0.4690
Epoch: 11/15
Train Loss: 0.6939
Validation Loss: 0.7011
Validation Accuracy: 0.4690
Epoch: 12/15
Train Loss: 0.6941
Validation Loss: 0.6920
Validation Accuracy: 0.5310
E

In [153]:
nk_no_back = os.listdir(os.path.join(base_dir, optional_path, "nk_no_back"))
mc_no_back = os.listdir(os.path.join(base_dir, optional_path, "mc_no_back"))
test_img = os.path.join(base_dir, optional_path, "nk_no_back", nk_no_back[0])

def preprocess_image2(image_path):
    img = cv2.imread(image_path, -1)
    img = cv2.resize(img, (50,50), interpolation=cv2.INTER_AREA)
    preprocess = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor()
    ])
    img = preprocess(img)
    img = img.unsqueeze(0)
    return img

with torch.no_grad():
    img = preprocess_image2(test_img)
    outputs = model2(img)
    outputs = F.softmax(outputs, 1)
    print(outputs)
    pred = torch.argmax(outputs).item()
    label = (pred == 0)
    print(label)
    

tensor([[0.3444, 0.6556]])
False


In [146]:
model2_state_dict = model2.state_dict()
torch.save(model2_state_dict, "classify_vgg16.pth")
model2_state_dict = torch.load("classify_vgg16.pth")
model2.load_state_dict(model2_state_dict)
model2.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1