In [1]:
import cv2
import numpy as np
from sklearn.cluster import KMeans

import os
from typing import List

In [2]:
# path_images = r'C:\Fotos y videos\test bovw'
path_images = r'C:\TRABAJO\Willdom\Sr Machine Learning Engineer\Challenge'
working_resolution = [512,512]
vocabulary_size = 64

# Load images

In [3]:
# Function to load images massively
def load_images_from_folder(folder: str, resolution: list) -> list:
    images = {}
    for filename in os.listdir(folder):
        if filename.split('.')[-1] == 'jpg':
            path = os.path.join(folder, filename)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, resolution)
            if img is not None:
                images[filename] = img
    return images

In [4]:
images = load_images_from_folder(path_images, working_resolution)
cv2.namedWindow('image', cv2.WINDOW_GUI_EXPANDED)
for filename in images.keys():
    cv2.imshow('image', images[filename])
    cv2.waitKey(0)
cv2.destroyAllWindows()

# Extract global descriptors

## Create a dictionary of descriptors with SIFT

In [5]:
def sift_dictionary(images: dict) -> list:
    sift = cv2.SIFT_create()
    visual_dictionary = []
    descriptors_by_image = {}
    for name, img in images.items():
        kp, des = sift.detectAndCompute(img, None)
        print(f'image {name} has {len(des)} descriptors')
        visual_dictionary.extend(des)
        descriptors_by_image[name] = des
    return visual_dictionary, descriptors_by_image

In [6]:
visual_dictionary, descriptors_by_image = sift_dictionary(images)
print(f'The visual dictionary has {len(visual_dictionary)} descriptors')

image im1.jpg has 2920 descriptors
image im2.jpg has 183 descriptors
image im3.jpg has 504 descriptors
image IMG_20220203_152013349.jpg has 1545 descriptors
image IMG_20220203_155830590.jpg has 986 descriptors
image IMG_20220207_131922403.jpg has 656 descriptors
image IMG_20220214_124603956.jpg has 1594 descriptors
image IMG_20220214_133333313.jpg has 1986 descriptors
image IMG_20220214_134233140.jpg has 1742 descriptors
image IMG_20220214_144907525.jpg has 1572 descriptors
image IMG_20220215_144642554.jpg has 1961 descriptors
image IMG_20220216_143433424.jpg has 1739 descriptors
image IMG_20220216_145949270.jpg has 1467 descriptors
image IMG_20220216_171549908.jpg has 1826 descriptors
image IMG_20220218_153152657.jpg has 1647 descriptors
image IMG_20220330_143611689.jpg has 1531 descriptors
image IMG_20220330_143707804.jpg has 495 descriptors
image IMG_20220330_143722955.jpg has 1988 descriptors
image IMG_20221013_095154171.jpg has 926 descriptors
image IMG_20221013_095209402.jpg has 

## Create a vocabulary with k-means quantization

In [7]:
def kmeans_visual_vocabulary(k: int, visual_dictionary: List) -> tuple:
    kmeans = KMeans(n_init=10, n_clusters=64)
    kmeans.fit(visual_dictionary)
    visual_words = kmeans.cluster_centers_
    word_classifier = lambda x: kmeans.predict(np.array(x, dtype='float')) 
    return visual_words, word_classifier

In [8]:
visual_words, word_classifier = kmeans_visual_vocabulary(vocabulary_size, visual_dictionary)
print(f'The visual vocabulary has {len(visual_words)} visual words')

The visual vocabulary has 64 visual words


## Get the image global descriptors

### Get the visual words in each image by using kmeans predictor

In [9]:
def get_words_by_image(descriptors_by_image: dict, word_classifier) -> dict:
    words_by_image = {}
    for name, descriptors in descriptors_by_image.items():
        words_by_image[name] = word_classifier(descriptors)
    return words_by_image



In [10]:
words_by_image = get_words_by_image(descriptors_by_image, word_classifier)

print(descriptors_by_image[list(descriptors_by_image.keys())[0]].shape)
print(words_by_image[list(descriptors_by_image.keys())[0]].shape)

(2920, 128)
(2920,)


### Create a histogram of visual words for each image

In [11]:
def get_global_descriptor_by_image(words_by_image: dict, vocabulary_size: int) -> dict:
    global_descriptor_by_image = {}
    for name, words in words_by_image.items():
        global_descriptor_by_image[name] = np.array(
            np.histogram(words, range=(-.5,vocabulary_size+.5), bins=vocabulary_size)[0],
        dtype='float32')
    return global_descriptor_by_image


In [12]:
global_descriptor_by_image = get_global_descriptor_by_image(words_by_image, vocabulary_size)

print(global_descriptor_by_image[list(global_descriptor_by_image.keys())[0]])

[ 37.  10.  22.  53.  43.  23.  22.  40.  44.  38.  34.  22.  26.  15.
  69.  18.   0.  23.  69.  41.  60.  30.  41.  39.  92.  31.  39.  42.
  23.  39.  45. 103. 107.  69.  21.  28. 133.  20.  54.  21.  49.  52.
  30.  32.  29.  22.  49.  83.  30.  89.  21.  43. 110.  43.  77.  63.
  31.  68.  74.  82.  45.  79.  33.   0.]


# Query

## Definition of the query image and result size

In [13]:
query_image_name = list(images.keys())[3]
query_image_name = 'IMG_20220203_152013349.jpg'
result_size = 4

win_name = f"Query image: {query_image_name}"
cv2.namedWindow(win_name)
cv2.imshow(win_name, images[query_image_name])
cv2.waitKey(0)
cv2.destroyAllWindows()

## Results


In [14]:
# Create a matcher
bf = cv2.FlannBasedMatcher()

# global_descriptor_by_image to array of descriptors
global_descriptors = np.zeros((len(images), vocabulary_size), 
                              dtype=global_descriptor_by_image[query_image_name].dtype)
i = 0
for name, global_descriptor in global_descriptor_by_image.items():
    global_descriptors[i, :] = global_descriptor_by_image[name]
    i += 1

# Match descriptors
matches = bf.knnMatch(global_descriptor_by_image[query_image_name].reshape((1, -1)), global_descriptors, k=result_size)[0]
matches_idx = [match.trainIdx for match in matches]
print(matches_idx)
matches_names = [list(images.keys())[i] for i in matches_idx]
print(matches_names)


for i in range(result_size):
    name = matches_names[i]
    cv2.imshow(f'Query result: {name} - Distance: {matches[i].distance}', cv2.resize(cv2.imread(name), working_resolution))
cv2.waitKey(0)
cv2.destroyAllWindows()


[3, 9, 8, 11]
['IMG_20220203_152013349.jpg', 'IMG_20220214_144907525.jpg', 'IMG_20220214_134233140.jpg', 'IMG_20220216_143433424.jpg']
