### Import libraries 

In [None]:
import cv2 as OpenCV
import numpy as np
import os
from matplotlib import pyplot as plt

### Helper functions


In [None]:
def displayImage(image, title = None):
    if image.ndim == 2:
        plt.gray()
    plt.imshow(image)
    if title is not None:
        plt.title(title)
    plt.show()

# OpenCV use BGR format for reading images as it's default 
# to get RGB images we need to change the format 
def readImageRGB(imagePath):
    return OpenCV.cvtColor(OpenCV.imread(imagePath), OpenCV.COLOR_BGR2RGB)

def RGBtoGRAY(images):
    return [OpenCV.cvtColor(image, OpenCV.COLOR_RGB2GRAY) for image in images]

### Read all images from file into an array: images

In [None]:
def readImages(folderPath):
    files = sorted(os.listdir(folderPath))
    return [
        readImageRGB(f"{folderPath}/{file}")
        for file in files
        if ".jpg" in file
    ]

images = readImages("../src/images/snow-man/")


### Get gray images

In [None]:
grayImages = RGBtoGRAY(images)

In [None]:
displayImage(images[0])
displayImage(grayImages[0])

## Feature Extraction
### Get **SIFT** keypoints & descriptors on single image

In [None]:
SIFT = OpenCV.SIFT_create()

def getSiftKeypoints(imageIndex):
    keyPoint, descriptor = SIFT.detectAndCompute(grayImages[imageIndex], None)
    return keyPoint, descriptor

### Get **MSER** keypoints & descriptors on all images

In [None]:
MSER = OpenCV.MSER_create()

def getMserKeypoints(imageIndex):
    keyPoint = MSER.detect(grayImages[imageIndex], None)
    descriptor = MSER.compute(grayImages[imageIndex], keyPoint)
    return keyPoint, descriptor

### Draw the keypoints 

In [None]:
def drawKeypoints(images, grayImages, keyPoints, method = "SIFT"):
    for i in range(len(images)):
        img = OpenCV.drawKeypoints(grayImages[i], keyPoints[i], images[i], flags = OpenCV.DrawMatchesFlags_DRAW_RICH_KEYPOINTS)
        OpenCV.imwrite(f"keyPoints/{method}/{str(i)}.jpg", img)

### Get keypoints & descriptors for all images

In [None]:
def getAllImagesKeyPoints(images, method = "SIFT"):
    keyPoints = []
    descriptors = []
    for i in range(len(images)):
        if method == "SIFT":
            keyPoint, descriptor = getSiftKeypoints(i)
        elif method == "MSER":
            keyPoint, descriptor = getMserKeypoints(i)
        keyPoints.append(np.array(keyPoint))
        descriptors.append(np.array(descriptor))
    return keyPoints, descriptors


In [None]:
# import multiprocessing as mp
# import cv2

# def getSiftKeypoints(image):
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     sift = cv2.SIFT_create()
#     keyPoints, descriptors = sift.detectAndCompute(gray, None)
#     return keyPoints, descriptors

# def process_image(i, images):
#     return getSiftKeypoints(images[i])

# if __name__ == '__main__':
#     images = [...]  # list of images
#     num_images = len(images)
#     pool = mp.Pool(mp.cpu_count())
#     results = [pool.apply_async(process_image, args=(i, images)) for i in range(num_images)]
#     pool.close()
#     pool.join()
#     keyPoints = []
#     descriptors = []
#     for res in results:
#         k, d = res.get()
#         keyPoints.append(k)
#         descriptors.append(d)
#     print(keyPoints, descriptors)


### SIFT keypoints & descriptors on all images

In [None]:
SkeyPoints, Sdescriptors = getAllImagesKeyPoints(images, "SIFT")

In [None]:
print(SkeyPoints[0][0].pt, SkeyPoints[0][0].size, SkeyPoints[0][0].angle, SkeyPoints[0][0].response, SkeyPoints[0][0].octave, SkeyPoints[0][0].class_id)
# [
#         [
#             print(kp.pt, kp.size, kp.angle, kp.response, kp.octave, kp.class_id)
#             for kp in kp_list
#         ] for kp_list in SkeyPoints 
# ]


In [None]:
import pickle
with open("../src/bak/snow-man/sift-features.pkl", 'rb') as f:
    keypoints_tuple, descriptors = pickle.load(f)
print(keypoints_tuple[0][0])
print(keypoints_tuple[0][1])
# cv2.KeyPoint(x=0.0, y=0.0, _size=0.0, _angle=-1.0, _response=0.0, _octave=0, _class_id=-1)
obj_list = []
for i, kp_array in enumerate(keypoints_tuple):
    for j, kp in enumerate(kp_array):
        print(i, j, kp)
        obj = OpenCV.KeyPoint(x=kp[0][0], y=kp[0][1], size=kp[1], angle=kp[2], response=kp[3], octave=kp[4], class_id=kp[5])
        obj_list.append(obj)
        print("success")
# [
#         [
#             OpenCV.KeyPoint(x=kp[0][0], y=kp[0][1], _size=kp[1], _angle=kp[2], _response=kp[3], _octave=kp[4], _class_id=kp[5]) 
#             for kp in kp_array
#         ] for kp_array in keypoints_tuple
#     ]

In [None]:
# SkeyPoints, Sdescriptors = getAllImagesKeyPointsParallel(images)

### ⛔️ MSER keypoints & descriptors on all images
still have error in this part (most likely due to the gpu type used )
try to run it on a different machine

In [None]:
# MkeyPoints, Mdescriptors = getAllImagesKeyPoints(images, "MSER")

### Draw keypoints for all images (SIFT)

In [None]:
if (not os.path.exists("keyPoints")):
    os.mkdir("keyPoints")
if (not os.path.exists("keyPoints/SIFT")):
    os.mkdir("keyPoints/SIFT")

drawKeypoints(images, grayImages, SkeyPoints, "SIFT")

### ⛔️ Draw keypoints for all images (MSER)


In [None]:
# if (not os.path.exists("keyPoints")):
#     os.mkdir("keyPoints")
# if (not os.path.exists("keyPoints/MSER")):
#     os.mkdir("keyPoints/MSER")

# drawKeypoints(images, grayImages, MkeyPoints, "MSER")

## Image Matching

In [None]:
allDescriptors = []
for descriptors in Sdescriptors:
    allDescriptors.extend(iter(descriptors))
allDescriptors = np.stack(allDescriptors)

### Create clusters from descriptors using **Kmeans** 
#### don't run this, load the file only


In [None]:
# from scipy.cluster.vq import kmeans

# clusters = 400
# iter = 2
# centroids, variance = kmeans(allDescriptors, clusters, iter)

### Save the centroids for later use intstead of training multiple times


In [None]:
# import joblib
# joblib.dump((clusters,centroids), "centroids.pkl", compress = 3)

### Load the file that stores the centroids


In [None]:
import joblib
clusters, centroids = joblib.load("centroids.pkl")

### Create the visualWords 
⛔️ Make sure to run **getSiftKeypoints** first

In [None]:
from scipy.cluster.vq import vq

visualWords = []
for descriptors in Sdescriptors:
    words, _ = vq(descriptors, centroids)
    visualWords.append(words)

### Calculate the frequency of each word

In [None]:
frequency_vectors = []
for img_words in visualWords:
    histogram = np.zeros(clusters)
    for word in img_words:
        histogram[word] += 1
    frequency_vectors.append(histogram)

frequency_vectors = np.stack(frequency_vectors)

### Plot the frequency of words

In [None]:
plt.bar(list(range(clusters)), frequency_vectors[100])
plt.show()

### **TFIDF**: get the visual words that does that most effect

In [None]:
n = len(images)

df = np.sum(frequency_vectors > 0, axis = 0)
idf = np.log(n/df)
tfidf = frequency_vectors * idf

In [None]:
plt.bar(list(range(clusters)), tfidf[100])
plt.show()

### Scoring and searching for image matches

In [None]:
from numpy.linalg import norm
b = tfidf
def search_display(i: int, top_clusters: int = 5):

    print("Search image:")
    # show the search image
    displayImage(images[i])
    print("-----------------------------------------------------")

    a = tfidf[i]
    cosine_similarity = np.dot(a, b.T)/(norm(a) * norm(b, axis=1))
    idx = np.argsort(-cosine_similarity)[:top_clusters]
    # display the results

    for i in idx:
        print(f"{str(i)}: {str(cosine_similarity[i])}")
        displayImage(images[i])

In [None]:
b = tfidf
# def search(i: int, top_clusters: int = 5):
#     a = tfidf[i]
#     cosine_similarity = np.dot(a, b.T)/(norm(a) * norm(b, axis=1))
#     idx = np.argsort(-cosine_similarity)[:top_clusters]
#     return idx
def search(i: int, top_clusters: int = 5):
    a = tfidf[i]
    b_subset = b[:tfidf.shape[0]]  # ensure b has the same number of rows as tfidf
    cosine_similarity = np.dot(a, b_subset.T)/(norm(a) * norm(b_subset, axis=1))
    idx = np.argsort(-cosine_similarity)[:top_clusters]
    # group index and similarity together in a tuple
    return list(zip(idx, cosine_similarity[idx]))

In [None]:
search_display(8,10)

### Get the best 30 matches for all images

In [None]:
matchesIDs = [search(i, 10) for i in range(len(images))]
print(*matchesIDs[0])


## Feature Matching
### Get the matching features between 2 images

In [None]:
def featureMatching(imgID1, imgID2, Sdescriptors):
    matcher = OpenCV.BFMatcher()
    return matcher.match(Sdescriptors[imgID1], Sdescriptors[imgID2])

In [None]:
match = featureMatching(0, 1, Sdescriptors)

## Match all images

In [None]:
def processMatches(matches):
        matches = sorted(matches, key = lambda x:x.distance)
        return matches[:int(len(matches)*0.2)]

In [None]:
# import logging
# import time
# # Initialize loging to file tune.log in current directory
# logging.basicConfig(filename='tune.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# logging.info('Hello, world!')
# def dataFeatureMatching(matchesIDs, Sdescriptors):
#     num_images = len(Sdescriptors)
#     checked = np.zeros((num_images, num_images), dtype=int)
#     matches_list = []
#     for imageID in range(len(matchesIDs)):
#         logging.info(f"---------- START Matches for: {str(imageID)}")
#         for i, (matchedID, probability) in enumerate(matchesIDs[imageID]):
#             if ((checked[imageID][matchedID] == 0 or checked[matchedID][imageID] == 0) and imageID != matchedID and probability > 0.93):
#                 start_time = time.time()
#                 matches_list.append([imageID, matchedID, featureMatching(imageID, matchedID, Sdescriptors)])
#                 checked[imageID][matchedID], checked[matchedID][imageID] = 1, 1
#                 logging.info(f"done [{i}/{len(matchesIDs[imageID])}] in {(time.time() - start_time):.4f}: {str(imageID)} - {str(matchedID)}")
#         # Flush the log file force write to disk
#         logging.shutdown()
#         # print(f"---------- DONE Matches for: {str(imageID)}")
#     return matches_list

# featuresMatches = dataFeatureMatching(matchesIDs, Sdescriptors)

import logging
import time
import concurrent.futures

# Initialize loging to file tune.log in current directory
logging.basicConfig(filename='tune.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info('Hello, world!')

def dataFeatureMatching(matchesIDs, Sdescriptors):
    num_images = len(Sdescriptors)
    checked = np.zeros((num_images, num_images), dtype=int)
    matches_list = []

    # Define the worker function for processing a single image
    def process_image(imageID):
        logging.info(f"---------- START Matches for: {str(imageID)}")
        matches = []
        for i, (matchedID, probability) in enumerate(matchesIDs[imageID]):
            if ((checked[imageID][matchedID] == 0 or checked[matchedID][imageID] == 0) and imageID != matchedID and probability > 0.93):
                start_time = time.time()
                matches.append([imageID, matchedID, featureMatching(imageID, matchedID, Sdescriptors)])
                checked[imageID][matchedID], checked[matchedID][imageID] = 1, 1
                logging.info(f"done [{i}/{len(matchesIDs[imageID])}] in {(time.time() - start_time):.4f}: {str(imageID)} - {str(matchedID)}")
        return matches

    # Use a thread pool executor to process the images in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_images) as executor:
        futures = []
        for imageID in range(num_images):
            future = executor.submit(process_image, imageID)
            futures.append(future)

        # Combine the results from all the futures
        for future in concurrent.futures.as_completed(futures):
            matches_list.extend(future.result())

    # Flush the log file force write to disk
    logging.shutdown()
    return matches_list

featuresMatches = dataFeatureMatching(matchesIDs, Sdescriptors)


In [None]:
import pickle

In [None]:
# Save featuresMatches to file

def convert_matches_to_dicts(matches):
    match_dicts = []
    for match in matches:
        match_dict = {'queryIdx': match.queryIdx, 'trainIdx': match.trainIdx, 'distance': match.distance}
        match_dicts.append(match_dict)
    return match_dicts

# Convert cv2.DMatch objects to dictionaries before pickling
matches_dicts = [
    [match[0], match[1], convert_matches_to_dicts(match[2])]
    for match in featuresMatches
]
# Serialize the matches_dicts list using pickle
with open('featuresMatches.pkl', 'wb') as f:
    pickle.dump(matches_dicts, f)

In [None]:
# Load the matches_dicts list from the file
with open('featuresMatches_12_3_Mohamed.pkl', 'rb') as f:
    loaded_matches_dicts = pickle.load(f)

# Convert dictionaries back to cv2.DMatch objects after unpickling
loaded_featuresMatches = []
for match_dict in loaded_matches_dicts:
    matches = [OpenCV.DMatch(match['queryIdx'], match['trainIdx'], match['distance']) for match in match_dict[2]]
    loaded_featuresMatches.append([match_dict[0], match_dict[1], matches])

In [None]:
print(type(loaded_featuresMatches))