# Feature Detection with SIFT and ORB on gray image

In [None]:
from expt_utils import *
from matplotlib.patches import Rectangle
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [None]:
def img_to_prewitt_edges(img, kernel_size=(3, 3)):
    img_blur = cv.blur(img, kernel_size)
    kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
    kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]])
    img_prewittx = cv.filter2D(img_blur, cv.CV_64F, kernelx)
    img_prewitty = cv.filter2D(img_blur, cv.CV_64F, kernely)
    img_prewitt = cv.magnitude(img_prewittx, img_prewitty)
    return img_prewitt


def img_to_rebert_cross_edges(img, kernel_size=(3, 3)):
    img_blur = cv.blur(img, kernel_size)
    roberts_x = np.array([[1, 0], [0, -1]], dtype=np.float32)
    roberts_y = np.array([[0, 1], [-1, 0]], dtype=np.float32)
    roberts_x_edge = cv.filter2D(img_blur, cv.CV_64F, roberts_x)
    roberts_y_edge = cv.filter2D(img_blur, cv.CV_64F, roberts_y)
    img_roberts = cv.magnitude(roberts_x_edge, roberts_y_edge)
    return img_roberts


def img_to_frei_chen_edges(img, kernel_size=(3, 3)):
    img_blur = cv.blur(img, kernel_size)
    frei_chen_x = np.array(
        [[1, np.sqrt(2), 1], [0, 0, 0], [-1, -np.sqrt(2), -1]], dtype=np.float32)
    frei_chen_y = np.array(
        [[-1, 0, 1], [-np.sqrt(2), 0, np.sqrt(2)], [-1, 0, 1]], dtype=np.float32)
    frei_chen_x_edge = cv.filter2D(img_blur, cv.CV_64F, frei_chen_x)
    frei_chen_y_edge = cv.filter2D(img_blur, cv.CV_64F, frei_chen_y)
    img_frei_chen = cv.magnitude(frei_chen_x_edge, frei_chen_y_edge)
    return img_frei_chen


def img_to_cragis_edges(img, kernel_size=(3, 3)):
    image_blur = cv.GaussianBlur(img, kernel_size, 0)
    craigs_x = cv.Sobel(image_blur, cv.CV_64F, 1, 0, ksize=3)
    craigs_y = cv.Sobel(image_blur, cv.CV_64F, 0, 1, ksize=3)
    img_craigs = cv.magnitude(craigs_x, craigs_y)
    return img_craigs

In [None]:
img = cv.imread(f"{DS_DIR}/train/images/00029_1640.jpg")
img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gnd_trth_gray = cv.imread(f"{IMG_DIR}/ground_truth/00029_1640.jpg", 0)

img_canny = img_to_canny_edges(img_gray)
img_mexican_hat = img_to_mexican_hat(img_gray)
img_hanny = img_to_hanny(img_gray)
img_prewitt = img_to_prewitt_edges(img_gray)
img_roberts = img_to_rebert_cross_edges(img_gray)
img_frei_chen = img_to_frei_chen_edges(img_gray)
img_craigs = img_to_cragis_edges(img_gray)

plt.figure(figsize=(20,10))

plt.subplot(3,3,1)
plt.imshow(img_canny,cmap='gray')
plt.title('Canny')

plt.subplot(3,3,2)
plt.imshow(img_mexican_hat,cmap='gray')
plt.title('Mexican Hat')

plt.subplot(3,3,3)
plt.imshow(img_hanny,cmap='gray')
plt.title('Hanny')

plt.subplot(3,3,4)
plt.imshow(img_prewitt,cmap='gray')
plt.title('prewitt')

plt.subplot(3,3,5)
plt.imshow(img_roberts,cmap='gray')
plt.title('roberts')

plt.subplot(3,3,6)
plt.imshow(img_frei_chen,cmap='gray')
plt.title('frei_chen')

plt.subplot(3,3,7)
plt.imshow(img_craigs,cmap='gray')
plt.title('craigs')

plt.subplot(3,3,8)
plt.imshow(img_gray,cmap='gray')
plt.title('img_gray')

# evaluate kernel size

In [None]:
kernel_sizes = [num for num in range(1, 32) if num % 2 != 0]
results_df = pd.DataFrame(columns=['Kernel Size', 'Canny', 'Mexican Hat', 'Hanny', 'Prewitt', 'Roberts', 'Frei-Chen', 'Craigs'])

# img_blur = cv.blur(img_gray, (3,3))
# for kernel_size in kernel_sizes:
    
#     img_canny_kernel = img_to_canny_edges(img_gray,(kernel_size,kernel_size))
#     corrcof_canny = np.corrcoef(img_canny_kernel.flatten(), gnd_trth_gray.flatten())[0,1]

#     img_mexican_hat_kernel = img_to_mexican_hat(img_gray,(kernel_size))
#     corrcof_mexican_hat = np.corrcoef(img_mexican_hat_kernel.flatten(), gnd_trth_gray.flatten())[0,1]

#     img_hanny_kernel = img_to_hanny(img_gray,(kernel_size,kernel_size))
#     corrcof_hanny = np.corrcoef(img_hanny_kernel.flatten(), gnd_trth_gray.flatten())[0,1]
    
#     img_prewitt_kernel = img_to_prewitt_edges(img_gray,(kernel_size,kernel_size))
#     corrcof_prewitt = np.corrcoef(img_prewitt_kernel.flatten(), gnd_trth_gray.flatten())[0,1]

#     img_roberts_kernel = img_to_rebert_cross_edges(img_gray,(kernel_size,kernel_size))
#     corrcof_roberts = np.corrcoef(img_roberts_kernel.flatten(), gnd_trth_gray.flatten())[0,1]

#     img_frei_chen_kernel = img_to_frei_chen_edges(img_gray,(kernel_size,kernel_size))
#     corrcof_frei_chen = np.corrcoef(img_frei_chen_kernel.flatten(), gnd_trth_gray.flatten())[0,1]

#     img_craigs_kernel = img_to_cragis_edges(img_gray,(kernel_size,kernel_size))
#     corrcof_craigs = np.corrcoef(img_craigs_kernel.flatten(), gnd_trth_gray.flatten())[0,1]
    
#     results_df.loc[len(results_df)] = [kernel_size, corrcof_canny, corrcof_mexican_hat, corrcof_hanny, corrcof_prewitt, corrcof_roberts, corrcof_frei_chen, corrcof_craigs]

# results_df
    
    # plt.figure(figsize=(10,8))
    # plt.imshow(detected_edges,cmap='gray')
    # plt.title(f"figure with size : {kernel_size}")

# rescale the image

In [None]:
# def mysize(image):
#     resizing_percentages = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]

#     for percentage in resizing_percentages:
#         new_width = int(image.shape[1] * percentage)
#         new_height = int(image.shape[0] * percentage)

#         resized_img = cv.resize(image, (new_width, new_height))
#         resized_img_ground_truth = cv.resize(img_ground_truth_gray, (new_width, new_height))
#         corrcof = np.corrcoef(resized_img.flatten(), resized_img_ground_truth.flatten())[0,1]
#         print(percentage, corrcof)

# mysize(img_mexican_hat)

# print(np.unique(img_mexican_hat))
# img_gray = cv.resize(img_gray, (int(img_gray.shape[1] * 0.4), int(img_gray.shape[0] * 0.4)))
# img_mexican_hat = cv.resize(img_mexican_hat, (int(img_mexican_hat.shape[1] * 0.4), int(img_mexican_hat.shape[0] * 0.4)))
# img_ground_truth_gray = cv.resize(img_ground_truth_gray, (int(img_ground_truth_gray.shape[1] * 0.4), int(img_ground_truth_gray.shape[0] * 0.4)))
# plt.imshow(img_mexican_hat,cmap='gray')

In [None]:
# img_mexican_hat = cv.normalize(img_mexican_hat, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_8U)
# sns.histplot(x=np.unique(gnd_trth_gray),bins=100)
# _, binary_thresholded_image = cv.threshold(img_mexican_hat, 80, 255, cv.THRESH_BINARY)
# # plt.imshow(binary_thresholded_image,cmap='gray')
# print(np.unique(img_mexican_hat))

# Feature Matching

In [None]:
# feature detection
sift = cv.SIFT_create()

keypoints_gray, descriptors_gray = sift.detectAndCompute(gnd_trth_gray, None)
keypoints_edge, descriptors_edge = sift.detectAndCompute(img_mexican_hat, None)

# object matching
bf = cv.BFMatcher()

matches = bf.knnMatch(descriptors_gray, descriptors_edge, k=2)

good_matches = []
total_correct = 0
for m, n in matches:
    if m.distance < 0.9 * n.distance: # if m distance (indicate to descripter to the grd_img) is less than 0.5 * n distance (indicate to descripter of the estimate_img) then it may belong to the object else it belong to the background
        good_matches.append(m)
        total_correct += 1

img_matches = cv.drawMatches(gnd_trth_gray, keypoints_gray, img_mexican_hat, keypoints_edge, good_matches, None, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

ground_truth_keypoints = total_correct
predicted_keypoints = len(keypoints_edge)

ground_truth_labels = np.zeros(len(matches))
predicted_labels = np.zeros(len(matches))
for i, match in enumerate(matches):
    if i in [m.queryIdx for m in good_matches]:  # check if the index is in good_matches
        predicted_labels[i] = 1
    if i < len(good_matches):
        ground_truth_labels[i] = 1

conf_matrix = confusion_matrix(ground_truth_labels, predicted_labels)

print(f"accuracy: {ground_truth_keypoints / predicted_keypoints :.2%}")
print("Confusion Matrix:")
print(conf_matrix)
print("All Truth Keypoints", len(keypoints_gray))
print("All Estimated Keypoints", len(keypoints_edge))
plt.figure(figsize=(20,5))
plt.imshow(img_matches)

In [None]:
# feature detection

orb = cv.ORB_create()

keypoints_gray, descriptors_gray = orb.detectAndCompute(gnd_trth_gray, None)
keypoints_edge, descriptors_edge = orb.detectAndCompute(img_mexican_hat, None)

# feature matching
bf = cv.BFMatcher()

matches = bf.knnMatch(descriptors_gray, descriptors_edge, k=2)

good_matches = []
total_correct = 0
for m, n in matches:
    if m.distance < 0.9 * n.distance:
        good_matches.append(m)
        total_correct += 1

img_matches = cv.drawMatches(gnd_trth_gray, keypoints_gray, img_mexican_hat, keypoints_edge, good_matches, None, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

ground_truth_labels = np.zeros(len(matches))
predicted_labels = np.zeros(len(matches))
for i, match in enumerate(matches):
    if i in [m.queryIdx for m in good_matches]:  # check if the index is in good_matches
        predicted_labels[i] = 1
    if i < len(good_matches):
        ground_truth_labels[i] = 1

conf_matrix = confusion_matrix(ground_truth_labels, predicted_labels)

print(f"accuracy: {ground_truth_keypoints / predicted_keypoints :.2%}")
print("Confusion Matrix:")
print(conf_matrix)
print("All Truth Keypoints", len(keypoints_gray))
print("All Estimated Keypoints", len(keypoints_edge))
plt.figure(figsize=(20,5))
plt.imshow(img_matches)

We use the matcher.match() method to find the matches between the descriptors of the two sets of keypoints. 
this method returns a list of matches where each match contains information about the corresponding keypoints in the two sets and the distance between their descriptors.

Optionally, we can perform k-nearest neighbors (KNN) search instead of simple matching by using the matcher. so for each descriptor in the first set, the knnMatch() method finds the k nearest neighbors in the second set of descriptors.

In [None]:
sourse_points = []
distination_points = []

for match in good_matches:
    sourse_points.append(keypoints_gray[match.queryIdx].pt)
    distination_points.append(keypoints_edge[match.trainIdx].pt)

src_pts = np.float32(sourse_points).reshape(-1, 1, 2)
dst_pts = np.float32(distination_points).reshape(-1, 1, 2)

x_min = min([pt[0][0] for pt in dst_pts])
x_max = max([pt[0][0] for pt in dst_pts])
y_min = min([pt[0][1] for pt in dst_pts])
y_max = max([pt[0][1] for pt in dst_pts])

rectangle = Rectangle((x_min, y_min), (x_max - x_min), (y_max - y_min), linewidth=2, edgecolor='r', facecolor='none')

fig, ax = plt.subplots(figsize=(20, 5))
ax.imshow(img_matches, cmap='gray')
ax.add_patch(rectangle)

# fig, ax = plt.subplots(figsize=(20, 5))
# ax.imshow(img_matches, cmap='gray')

# # Iterate over each point in dst_pts
# for pt in dst_pts:
#     x, y = pt[0]
#     # Define the width and height of the rectangle (adjust as needed)
#     rect_width = 10
#     rect_height = 10
#     # Calculate the coordinates of the top-left corner of the rectangle
#     x_min = x - rect_width / 2
#     y_min = y - rect_height / 2
#     # Create and add the rectangle patch to the plot
#     rectangle = Rectangle((x_min, y_min), rect_width, rect_height, linewidth=1, edgecolor='r', facecolor='none')
#     ax.add_patch(rectangle)

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
ax.imshow(img_matches, cmap='gray')

for pt in dst_pts:
    x, y = pt[0]
    
    rect_width = 10
    rect_height = 10
    
    x_min = x - rect_width / 2
    y_min = y - rect_height / 2
    
    rectangle = Rectangle((x_min, y_min), rect_width, rect_height, linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rectangle)

In [None]:

homography, _ = cv.findHomography(src_pts, dst_pts, cv.RANSAC)

h, w = img_mexican_hat.shape[:2]
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
transformed_pts = cv.perspectiveTransform(pts, homography)
min_x, min_y = np.int32(transformed_pts.min(axis=0).ravel())
max_x, max_y = np.int32(transformed_pts.max(axis=0).ravel())

result_image = gnd_trth_gray.copy()
rectangle = cv.rectangle(result_image, (min_x, min_y), (max_x, max_y), (255, 255, 255), 1)

plt.imshow(rectangle ,cmap='gray')