In [49]:
import zipfile
import os

zip_file_path = '/content/archive (1).zip'
unzip_dir = '/content/archive(1)'

# Create the directory if it doesn't exist
if not os.path.exists(unzip_dir):
    os.makedirs(unzip_dir)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

In [50]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score, rand_score
from skimage.io import imread
from skimage.feature import local_binary_pattern, hog
from skimage.color import rgb2gray
import cv2

# Function to load COIL-20 images
def load_coil20_images():
       coil20_data = []
       for i in range(1, 21):
           for j in range(0, 71):
               image = imread(f'/content/archive(1)/coil-20/coil-20-proc/obj{i}__{j}.png')
               coil20_data.append(image)
       print("Number of images loaded:", len(coil20_data))  # Add this line
       return np.array(coil20_data)

# Load images
images = load_coil20_images()


Number of images loaded: 1420


In [51]:
# Convert to grayscale
# Convert images to grayscale if they are RGB
def convert_to_grayscale(images):
    gray_images = []
    for img in images:
        if img.ndim == 3 and img.shape[-1] == 3:  # Check if image is RGB
            gray_img = rgb2gray(img)
        else:
            gray_img = img  # Image is already grayscale
        gray_images.append(gray_img)
    return np.array(gray_images)

gray_images = convert_to_grayscale(images)

# Extract LBP, HOG, and SIFT features
def extract_lbp_features(images):
    radius = 3
    n_points = 8 * radius
    return np.array([local_binary_pattern(img, n_points, radius, method='uniform').flatten() for img in images])

def extract_hog_features(images):
    return np.array([hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True) for img in images])

# Extract SIFT features (modified)
def extract_sift_features(images, max_features=128):
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        if img.ndim == 3 and img.shape[-1] == 3:  # If image is RGB
            gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        else:
            gray_img = img  # Image is already grayscale
        kp, des = sift.detectAndCompute(gray_img, None)

        # Handle cases where no keypoints are detected
        if des is None:
            des = np.zeros((max_features, 128)) # Pad with zeros if no keypoints are found

        if des.shape[0] < max_features:
            des = np.pad(des, ((0, max_features - des.shape[0]), (0, 0)), mode='constant')
        else:
            des = des[:max_features, :]

        features.append(des.flatten())

    return np.array(features)

# Evaluate clustering performance (modified)
n_clusters = 20  # Number of objects in COIL-20
true_labels = np.repeat(np.arange(20), 72)  # True labels for each object

metrics = {}
clustered_images = {}

for name, features in combined_features.items():
    # Ensure features and labels have the same number of samples
    num_samples = min(features.shape[0], true_labels.shape[0])
    features = features[:num_samples]
    true_labels_subset = true_labels[:num_samples]

    kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(features)
    clusters = kmeans.labels_

    nmi = normalized_mutual_info_score(true_labels_subset, clusters)
    rand_idx = rand_score(true_labels_subset, clusters)
    adj_rand_idx = adjusted_rand_score(true_labels_subset, clusters)

    metrics[name] = (nmi, rand_idx, adj_rand_idx)

    # Store images in clusters
    clustered_images[name] = {i: np.where(clusters == i)[0].tolist() for i in range(n_clusters)}

# Print metrics
for name, (nmi, rand_idx, adj_rand_idx) in metrics.items():
    print(f"{name}: NMI={nmi:.4f}, Rand Index={rand_idx:.4f}, Adjusted Rand Index={adj_rand_idx:.4f}")


lbp_features = extract_lbp_features(gray_images)
hog_features = extract_hog_features(gray_images)
sift_features = extract_sift_features(images)


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


LBP: NMI=0.6799, Rand Index=0.9405, Adjusted Rand Index=0.4524
HOG: NMI=0.7051, Rand Index=0.9447, Adjusted Rand Index=0.4815
SIFT: NMI=0.3481, Rand Index=0.8803, Adjusted Rand Index=0.1337
LBP_HOG: NMI=0.6878, Rand Index=0.9420, Adjusted Rand Index=0.4669
LBP_SIFT: NMI=0.4066, Rand Index=0.9031, Adjusted Rand Index=0.1874
HOG_SIFT: NMI=0.3481, Rand Index=0.8803, Adjusted Rand Index=0.1337
LBP_HOG_SIFT: NMI=0.4185, Rand Index=0.8925, Adjusted Rand Index=0.1951


In [52]:
# Combine features
combined_features = {
    'LBP': lbp_features,
    'HOG': hog_features,
    'SIFT': sift_features,
    'LBP_HOG': np.hstack([lbp_features, hog_features]),
    'LBP_SIFT': np.hstack([lbp_features, sift_features]),
    'HOG_SIFT': np.hstack([hog_features, sift_features]),
    'LBP_HOG_SIFT': np.hstack([lbp_features, hog_features, sift_features])
}


In [53]:
print(f"LBP Features Shape: {lbp_features.shape}")
print(f"HOG Features Shape: {hog_features.shape}")
print(f"SIFT Features Shape: {sift_features.shape}")
assert lbp_features.shape[0] == hog_features.shape[0] == sift_features.shape[0]


LBP Features Shape: (1420, 16384)
HOG Features Shape: (1420, 8100)
SIFT Features Shape: (1420, 16384)


In [54]:
# Evaluate clustering performance
n_clusters = 20  # Number of objects in COIL-20
true_labels = np.repeat(np.arange(20), 71)  # True labels for each object

metrics = {}
clustered_images = {}

for name, features in combined_features.items():
    kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(features)
    clusters = kmeans.labels_

    nmi = normalized_mutual_info_score(true_labels, clusters)
    rand_idx = rand_score(true_labels, clusters)
    adj_rand_idx = adjusted_rand_score(true_labels, clusters)

    metrics[name] = (nmi, rand_idx, adj_rand_idx)

    # Store images in clusters
    clustered_images[name] = {i: np.where(clusters == i)[0].tolist() for i in range(n_clusters)}

# Print metrics
for name, (nmi, rand_idx, adj_rand_idx) in metrics.items():
    print(f"{name}: NMI={nmi:.4f}, Rand Index={rand_idx:.4f}, Adjusted Rand Index={adj_rand_idx:.4f}")


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


LBP: NMI=0.7914, Rand Index=0.9573, Adjusted Rand Index=0.5959
HOG: NMI=0.8091, Rand Index=0.9659, Adjusted Rand Index=0.6500
SIFT: NMI=0.3714, Rand Index=0.8717, Adjusted Rand Index=0.1468
LBP_HOG: NMI=0.8209, Rand Index=0.9657, Adjusted Rand Index=0.6574
LBP_SIFT: NMI=0.4364, Rand Index=0.8840, Adjusted Rand Index=0.1914
HOG_SIFT: NMI=0.3714, Rand Index=0.8720, Adjusted Rand Index=0.1467
LBP_HOG_SIFT: NMI=0.4364, Rand Index=0.8840, Adjusted Rand Index=0.1914


In [55]:

for name, clusters in clustered_images.items():
    print(f"\n{name} clusters:")

    max_cluster_id_length = max(len(f"Cluster {cluster_id}") for cluster_id in clusters.keys())

    for cluster_id, image_indices in clusters.items():
        formatted_cluster_id = f"Cluster {cluster_id}".ljust(max_cluster_id_length)

        print(f"{formatted_cluster_id}: {image_indices}")



LBP clusters:
Cluster 0 : [308, 343, 344, 345]
Cluster 1 : [158, 159, 160, 194, 195, 196, 197, 298, 299, 300, 335, 336, 338, 339, 340, 372, 373, 407, 408, 409, 410, 583, 639, 640, 641, 642, 643, 644, 645, 646, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 704, 705, 706, 707, 708, 709, 923, 924, 925, 938, 939, 940, 941, 942, 943, 956, 957, 958, 959, 960, 961, 974, 975, 976, 977, 978, 979, 992, 993, 1294, 1295, 1296, 1297, 1330, 1331, 1332, 1333]
Cluster 2 : [620, 621, 661, 662, 663, 664, 665, 666, 667, 697, 698, 699, 700, 701, 702, 703]
Cluster 3 : [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141]
Cluster 4 : [163, 164, 165, 166, 167, 199, 200, 201, 202, 203, 204, 305, 306, 307, 3

In [57]:
def compare_clusters_in_views(clustered_images, view1, view2):
    """
    Compares clusters with the same ID in two different views, printing the items,
    and unique and common components.

    Args:
        clustered_images: A dictionary of clustered images.
        view1: The name of the first view.
        view2: The name of the second view.
    """
    clusters1 = clustered_images[view1]
    clusters2 = clustered_images[view2]

    for cluster_id in clusters1.keys():
        items1 = clusters1[cluster_id]
        items2 = clusters2[cluster_id]
        common_items = set(items1).intersection(items2)
        unique_items1 = set(items1).difference(items2)
        unique_items2 = set(items2).difference(items1)

        print(f"Cluster {cluster_id} in {view1} vs {view2}:")
        print(f"  Items in {view1}: {items1}")
        print(f"  Items in {view2}: {items2}")
        print(f"  Common items: {list(common_items)}")
        print(f"  Unique to {view1}: {list(unique_items1)}")
        print(f"  Unique to {view2}: {list(unique_items2)}")
        print()

# Example usage (add this after your existing code):
compare_clusters_in_views(clustered_images, 'LBP_SIFT', 'HOG_SIFT')

Cluster 0 in LBP_SIFT vs HOG_SIFT:
  Items in LBP_SIFT: [164, 322, 323, 325, 326, 327, 355, 358, 359, 360, 361, 362, 378, 381, 383, 384, 385, 386, 387, 388, 390, 391, 393, 395, 396, 397, 398, 401, 402, 403, 415, 417, 418, 419, 421, 422, 423, 424, 425, 1278, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1303, 1305, 1306, 1309, 1310, 1312, 1313, 1314, 1315, 1317, 1318, 1325, 1326, 1327, 1344, 1347, 1348, 1371, 1383, 1386, 1387, 1403, 1404, 1414]
  Items in HOG_SIFT: [290, 293, 1282, 1283]
  Common items: [1282, 1283]
  Unique to LBP_SIFT: [1281, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1303, 1305, 1306, 1309, 1310, 1312, 1313, 1314, 1315, 1317, 1318, 1325, 1326, 1327, 1344, 322, 323, 1347, 325, 326, 327, 1348, 1371, 355, 358, 359, 360, 361, 362, 1383, 1386, 1387, 378, 1403, 1404, 381, 383, 384, 385, 386, 387, 388, 390, 391, 1414, 393, 395, 396, 397, 398, 401, 402, 403, 415, 417, 418, 419, 164, 421, 422, 423, 424, 425, 1278]
  Unique to HOG_SIFT: [290, 293]

Clu