## Step A

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import collections
import os

The ```compute_all_keypoints``` function calculates all keypoints of all query and train images and stores them in a dictionary, in order to easily access them later.

In [None]:
def compute_all_keypoints(query_imgs, train_imgs, sift):

    img_dict = {}

    for img in query_imgs:
        file = 'models/' + img + '.jpg'
        query = cv2.imread(file, 0)
        kp, des = sift.detectAndCompute(query, None)
        img_dict[img] = {'kp': kp, 'des': des, 'shape': query.shape}

    for img in train_imgs:
        file = 'scenes/' + img + '.png'
        train = cv2.imread(file, 0)
        kp, des = sift.detectAndCompute(train, None)
        img_dict[img] = {'kp': kp, 'des': des, 'shape': train.shape}

    return img_dict

The ```apply_ratio_test``` function takes all the matches found between the query and the train image, it chooses the good ones with the usual ratio test and it stores them in a dictionary using the indexes of the query keypoints as keys and the indexes of the train keypoints as values.

In [None]:
def apply_ratio_test(all_matches):

    # map of matches kp_query_idx -> kp_train_idx
    good_matches = {}
  
    for m, n in all_matches:
        if m.distance < LOWE_COEFF * n.distance:
            good_matches[m.queryIdx] = m.trainIdx

    return good_matches

The ```check_matches``` function orders the good matches in decreasing number of keypoints and it runs a series of tests on them, checking the geometric arrangement and the color consistency.

In [None]:
def check_matches(global_matches, train_img, img_dict):
    
    sorted_global_matches = collections.OrderedDict(sorted(global_matches.items(), key=lambda item: item[1][0], reverse=True))
    
    recognised = {}
    
    train_file = 'scenes/' + train_img + '.png'
    train_bgr = cv2.imread(train_file)

    for k, v in sorted_global_matches.items():

        if v[0] > MIN_MATCH_COUNT:
            
            query_file = 'models/' + k + '.jpg'
            query_bgr = cv2.imread(query_file)
            
            src_pts = np.float32([img_dict[k]['kp'][p].pt for p in v[1].keys()]).reshape(-1, 1, 2)
            dst_pts = np.float32([img_dict[train_img]['kp'][p].pt for p in v[1].values()]).reshape(-1, 1, 2)
            M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            h, w, d = query_bgr.shape
            pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
            dst = cv2.perspectiveTransform(pts, M)

            center = tuple((dst[0, 0, i] + dst[1, 0, i] + dst[2, 0, i] + dst[3, 0, i]) / 4 for i in (0, 1))

            x_min = int(max((dst[0, 0, 0] + dst[1, 0, 0]) / 2, 0))
            y_min = int(max((dst[0, 0, 1] + dst[3, 0, 1]) / 2, 0))
            x_max = int(min((dst[2, 0, 0] + dst[3, 0, 0]) / 2, img_dict[train_img]['shape'][1]))
            y_max = int(min((dst[1, 0, 1] + dst[2, 0, 1]) / 2, img_dict[train_img]['shape'][0]))

            query_color = query_bgr.mean(axis=0).mean(axis=0)
            train_crop = train_bgr[y_min:y_max,x_min:x_max]
            train_color = train_crop.mean(axis=0).mean(axis=0)   
            color_diff = np.sqrt(np.sum([value ** 2 for value in abs(query_color - train_color)]))

            temp = True 
            if color_diff < COLOR_T :
                for r, corners in recognised.items():
                    r_center = tuple((corners[0, 0, i] + corners[1, 0, i] + corners[2, 0, i] + corners[3, 0, i]) / 4 for i in (0, 1))
                    if (center[0] > min(corners[0, 0, 0], corners[1, 0, 0]) and center[0] < max(corners[2, 0, 0], corners[3, 0, 0])\
                        and center[1] > min(corners[0, 0, 1], corners[3, 0, 1]) and center[1] < max(corners[1, 0, 1], corners[2, 0, 1]))\
                        or (r_center[0] > x_min and r_center[0] < x_max\
                        and r_center[1] > y_min and r_center[1] < y_max):
                        temp = False
                        break
                if temp:
                    recognised[k] = dst
                    
    return recognised

The ```print_matches``` function takes all the recognised images and prints their details, i.e. their position, width, and height.

In [None]:
def print_matches(train_img, query_imgs, recognised, true_imgs, verbose):
    
    print('Scene: ' + train_img + '\n')

    for query_img in query_imgs:
                     
        total = int(query_img in recognised.keys())    
        true_total = int(query_img in true_imgs[train_img])
        
        if total != true_total:
            print('\033[1m' + 'Product ' + query_img + ' – ' + str(total) + '/' + str(true_total) + ' instances found' + '\033[0m')
        elif total > 0 or verbose == True:
            print('Product ' + query_img + ' – ' + str(total) + '/' + str(true_total) + ' instances found')
        
        if total == 1:
            dst = recognised[query_img]
            center = tuple(int((dst[0, 0, i] + dst[1, 0, i] + dst[2, 0, i] + dst[3, 0, i]) / 4) for i in (0, 1))
            w = int(((dst[3, 0, 0] - dst[0, 0, 0]) + (dst[2, 0, 0] - dst[1, 0, 0])) /2)
            h = int(((dst[1, 0, 1] - dst[0, 0, 1]) + (dst[2, 0, 1] - dst[3, 0, 1])) /2)
            print('\t' + 'Position: ' + str(center)\
                 + '\t' + 'Width: ' + str(w)\
                 + '\t' + 'Height: ' + str(h))

The ```draw_matches``` function draws on the train image the boxes' homographies and the numbers corresponding to the query images.

In [None]:
def draw_matches(recognised, train_img, color):
    
    train_file = 'scenes/' + train_img + '.png'
    if color == True:
        train_bgr = cv2.imread(train_file)
        train_temp = cv2.cvtColor(train_bgr, cv2.COLOR_BGR2RGB)
        train_rgb = np.zeros(train_bgr.shape, train_bgr.dtype)
        for y in range(train_temp.shape[0]):
            for x in range(train_temp.shape[1]):
                for c in range(train_temp.shape[2]):
                    train_rgb[y, x, c] = np.clip(0.5 * train_temp[y, x, c], 0, 255)
    else:
        train_gray = cv2.imread(train_file, 0)
        train_rgb = cv2.cvtColor(train_gray // 2, cv2.COLOR_GRAY2RGB)

    for k, v in recognised.items():

        train_rgb = cv2.polylines(train_rgb, [np.int32(v)], True, (0, 255, 0), 3, cv2.LINE_AA)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(train_rgb, k,\
                        (int((v[3, 0, 0] - v[0, 0, 0]) * 0.25 + v[0, 0, 0]), int((v[1, 0, 1] - v[0, 0, 1]) * 0.67 + v[0, 0, 1])),\
                        font, 5, (0, 255, 0), 10, cv2.LINE_AA)
    
    plt.imshow(train_rgb),plt.show();
    
    if color == True:
        
        if not os.path.exists('output/step_A/'):
            os.mkdir('output/step_A/')

        cv2.imwrite('output/step_A/' + train_img + '.png', cv2.cvtColor(train_rgb, cv2.COLOR_RGB2BGR))

The ```step_A``` function takes the lists of query and train images and performs the product recognition.

In [None]:
def step_A(query_imgs, train_imgs, true_imgs, verbose, color):

    sift = cv2.xfeatures2d.SIFT_create()

    bf = cv2.BFMatcher()

    img_dict = compute_all_keypoints(query_imgs, train_imgs, sift)

    for train_img in train_imgs:

        kp_train, des_train = img_dict[train_img]['kp'], img_dict[train_img]['des']

        global_matches = {}

        for query_img in query_imgs:

            kp_query, des_query = img_dict[query_img]['kp'], img_dict[query_img]['des']
            
            all_matches = bf.knnMatch(des_query, des_train, k=2)
            good_matches = apply_ratio_test(all_matches)
            global_matches[query_img] = (len(good_matches), good_matches)

        recognised = check_matches(global_matches, train_img, img_dict)

        print_matches(train_img, query_imgs, recognised, true_imgs, verbose)
        
        draw_matches(recognised, train_img, color)

        print('\n')

Parameters:

In [None]:
LOWE_COEFF = 0.5
MIN_MATCH_COUNT = 30
COLOR_T = 50

In [None]:
query_imgs = ['0', '1', '11', '19', '24', '25', '26']
train_imgs = ['e1', 'e2', 'e3', 'e4', 'e5']
true_imgs = {
    'e1': {'0', '11'},
    'e2': {'24', '25', '26'},
    'e3': {'0', '1', '11'},
    'e4': {'0', '11', '25', '26'},
    'e5': {'19', '25'},
}

In [None]:
# verbose=False does not print the true negative instances
# color=True outputs all the scenes in color instead of grayscale and saves them, but the process is quite slow
step_A(query_imgs, train_imgs, true_imgs, verbose=False, color=False)