In [43]:
import cv2
import numpy as np
import os
from PIL import Image

In [49]:
import cv2
import numpy as np

def extract_sift_features(image_path):
    # Load the image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Initialize SIFT detector with a lower threshold to get more keypoints
    sift = cv2.SIFT_create(contrastThreshold=0.01, edgeThreshold=10)
    
    # Detect and compute SIFT features
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    
    # Set a minimum threshold for the number of keypoints
    min_keypoints = 128
    
    if len(keypoints) < min_keypoints:
        print(f"Not enough keypoints in {image_path} to create a {min_keypoints}-dimensional feature vector")
        if len(keypoints) == 0:
            print("No keypoints detected. Filling descriptors with zeros.")
            descriptors = np.zeros((min_keypoints, 128), dtype=np.float32)
        else:
            # Sort keypoints by response (strength of the feature)
            keypoints = sorted(keypoints, key=lambda x: -x.response)
        
            # Extract the first 128 descriptors to create a 128-dimensional feature vector
            descriptors = descriptors[:128]
    
    return descriptors


In [50]:
# Define the root directory where your "images" folder is located
root_directory = "dtd-r1.0.1/dtd/images"

# Create a list of all image file extensions you want to consider (e.g., '.jpg', '.png')
image_extensions = ['.jpg', '.png', '.jpeg']

In [51]:
SIFT_features = []

In [None]:
SIFT_features

[('banded_0008.jpg',
  array([[ 0.,  0.,  0., ...,  1.,  0.,  0.],
         [ 0.,  0.,  0., ...,  1.,  0.,  0.],
         [ 0.,  0.,  0., ...,  1.,  0.,  0.],
         ...,
         [79., 28., 57., ..., 27., 40., 21.],
         [81., 28., 44., ..., 29., 54., 26.],
         [92., 28., 52., ..., 25., 42., 20.]], dtype=float32)),
 ('banded_0011.jpg',
  array([[ 22.,   1.,   0., ...,   0.,   0.,   0.],
         [ 26.,   1.,   0., ...,   0.,   0.,   0.],
         [ 26.,   1.,   0., ...,   0.,   0.,   0.],
         ...,
         [143.,   3.,   0., ...,   0.,   0.,   0.],
         [143.,   5.,   0., ...,   0.,   0.,   1.],
         [143.,   2.,   0., ...,   1.,   0.,   0.]], dtype=float32)),
 ('banded_0016.jpg',
  array([[  0.,   0.,   0., ...,   2.,   0.,   0.],
         [135.,   1.,   0., ...,   1.,   0.,   1.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         ...,
         [  9.,   9.,   2., ...,   2.,   0.,   9.],
         [  1.,   0.,   0., ...,   1.,  10.,  14.],
         [ 

In [48]:
import csv

csv_file_path = "SIFT_FEATURES.csv"

with open(csv_file_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['ImageName', 'Vector'])  

    for image_name, vector in SIFT_features:
        csv_writer.writerow([image_name, ','.join(map(str, vector))])

In [None]:
def euclidean(a, b):
    return np.linalg.norm(a - b)
def cosine(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
import csv
def calculate_distances(image_name1, image_name2, csv_file_path="HOG_features.csv"):
    vector1 = None
    vector2 = None
    
    # Read the CSV file to get the vectors for the given image names.
    with open(csv_file_path, 'r', newline='') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader)  # Skip the header row
        for row in csv_reader:
            if row[0] == image_name1:
                vector1 = np.array(list(map(float, row[1].split(','))))
            if row[0] == image_name2:
                vector2 = np.array(list(map(float, row[1].split(','))))

    if vector1 is None or vector2 is None:
        print("One or both of the images were not found in the CSV file.")
        return

    # Calculate Euclidean distance and cosine similarity between the vectors.
    euclidean_distance = euclidean(vector1, vector2)
    cosine_similarity = cosine(vector1, vector2)

    return euclidean_distance, cosine_similarity


val = calculate_distances("banded_0002.jpg","banded_0004.jpg")

print("euclidean Distance:",val[0])
print("Cosine Similarity :",val[1])

euclidean Distance: 2.3246063080821777
Cosine Similarity : 0.5428065544653734


In [None]:

# Define a function to search for the most similar images.
def search(query_image_path, csv_file_path, top_k=5):
    # Open the query image using PIL.


    # Apply the get_vector() function to the query image.
    query_vector = extract_sift_features(query_image_path)

    # Read the CSV file to get image vectors.
    image_data_list = []
    with open(csv_file_path, 'r', newline='') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader)  # Skip the header row
        for row in csv_reader:
            image_name = row[0]
            vector = np.array(list(map(float, row[1].split(','))))
            image_data_list.append((image_name, vector))

    # Calculate cosine and Euclidean distances between the query vector and all other vectors.
    cosine_similarities = []
    euclidean_distances = []
    for image_name, vector in image_data_list:
        cosine_similarity = cosine(query_vector, vector)
        euclidean_distance = euclidean(query_vector , vector)
        cosine_similarities.append((image_name, cosine_similarity))
        euclidean_distances.append((image_name, euclidean_distance))

    # Sort the images by similarity/distance in descending order.
    top_cosine_similar_images = sorted(cosine_similarities, key=lambda x: x[1], reverse=True)[:top_k]
    top_euclidean_similar_images = sorted(euclidean_distances, key=lambda x: x[1])[:top_k]

    return top_cosine_similar_images, top_euclidean_similar_images, query_vector



In [None]:
query_image_path = "dtd-r1.0.1/dtd/images/braided/braided_0007.jpg"
cosine_results, euclidean_results,query_vector  = search(query_image_path, 'HOG_features.csv', top_k=5)


print("\nTop 5 Cosine Similar Images:")
for i, (image_name, similarity) in enumerate(cosine_results, 1):
    print(f"Top {i}: {image_name} (Similarity: {similarity:.4f})")

print("\nTop 5 Euclidean Distance Images:")
for i, (image_name, distance) in enumerate(euclidean_results, 1):
    print(f"Top {i}: {image_name} (Euclidean Distance: {distance:.4f})")




ValueError: shapes (128,128) and (256,) not aligned: 128 (dim 1) != 256 (dim 0)