In [6]:
import cv2
import numpy as np
import os

In [7]:
def create_histogram(image):
    """
    Creates a 3D color histogram in the HSV color space.
    """
    # Convert the image from BGR (OpenCV default) to HSV
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Calculate the histogram
    # We use 8 bins for Hue, 12 for Saturation, 3 for Value
    # This gives us 8*12*3 = 288 "bins" in our fingerprint
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, [8, 12, 3], [0, 180, 0, 256, 0, 256])
    
    # Normalize the histogram to make it comparable
    cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX)
    
    # Return the flattened 1D histogram
    return hist.flatten()

In [8]:
# --- 1. Indexing our "database" ---
print("Indexing images in the 'dataset' folder...")

# This dictionary will hold our "fingerprints"
# Key: image_filename, Value: histogram
index = {}

# Folder containing our images
dataset_folder = "dataset"

for filename in os.listdir(dataset_folder):
    filepath = os.path.join(dataset_folder, filename)
    
    # Read the image
    image = cv2.imread(filepath)
    if image is None:
        continue
    
    # Create its "fingerprint" and add it to the index
    hist = create_histogram(image)
    index[filename] = hist

print(f"Done. Indexed {len(index)} images.")

Indexing images in the 'dataset' folder...
Done. Indexed 17 images.


In [9]:
# --- 2. The Search ---
print("Loading query image...")
query_image = cv2.imread("query.jpg")
if query_image is None:
    print("Error: Could not load query.jpg. Make sure it's in the right folder.")
    exit()

# Create the fingerprint for our query image
query_hist = create_histogram(query_image)

# This list will hold our results
results = []

print("Comparing query image to index...")
# Loop through our entire index
for (filename, hist) in index.items():
    # Compare the query histogram to the indexed histogram
    # We use the Chi-Squared distance (a good choice for histograms)
    # A *smaller* distance means *more* similar
    distance = cv2.compareHist(query_hist, hist, cv2.HISTCMP_CHISQR)
    
    # Add the (distance, filename) to our results list
    results.append((distance, filename))


Loading query image...
Comparing query image to index...


In [10]:
# --- 3. Display Results ---

# Sort the results so the smallest distances (best matches) are first
results.sort(key=lambda x: x[0])

print("\n--- Top 5 Matches ---")

# Show the query image in a window
cv2.imshow("Query Image", query_image)

# Loop over the top 5 results
for i in range(5):
    (distance, filename) = results[i]
    print(f"{i+1}. {filename} (Distance: {distance:.2f})")
    
    # Load and show the result image
    result_image = cv2.imread(os.path.join(dataset_folder, filename))
    cv2.imshow(f"Result {i+1}", result_image)

print("\nPress any key to close all image windows.")
cv2.waitKey(0) # Wait for the user to press a key
cv2.destroyAllWindows()


--- Top 5 Matches ---
1. beach (5).jpg (Distance: 4157.56)
2. forest5.jpg (Distance: 12736.88)
3. beach(2).jpg (Distance: 30597.14)
4. building2.jpg (Distance: 33560.82)
5. building3.jpg (Distance: 58251.41)

Press any key to close all image windows.
