Explanation
    Directory Traversal:

        1. The script traverses the root directory (dataset_path) and lists all subdirectories, which correspond to class names.
        2. For each subdirectory (class), it lists all files (images).
    Label Assignment:

        1. For each image file, it constructs the full path and assigns the class name (subdirectory name) as the label.

Label Storage:

        1. The labels are stored in a dictionary with the image path as the key and the class name as the value.
        2. The dictionary is optionally saved to a JSON file for easy access later.
Example Usage
Dataset Path: Set the dataset_path variable to the path of your dataset.
Run the Script: Execute the script to generate the labels and save them to a JSON file.

In [None]:
import os
import json

# Define the path to your dataset
dataset_path = 'path/to/your/dataset'

# Initialize a dictionary to store image paths and their corresponding labels
image_labels = {}

# Loop over each class directory
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)
    
    if os.path.isdir(class_dir):  # Ensure it's a directory
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            
            if os.path.isfile(img_path):  # Ensure it's a file
                image_labels[img_path] = class_name

# Optionally, save the labels to a JSON file for future use
with open('image_labels.json', 'w') as json_file:
    json.dump(image_labels, json_file, indent=4)

print("Labels generated and saved to image_labels.json")

clustering code :- 


In [None]:
import os
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Parameters
image_dir = 'path_to_your_image_directory'
img_size = (224, 224)
num_clusters = 4

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Function to preprocess images
def preprocess_image(image_path):
    img = load_img(image_path, target_size=img_size)
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = img / 255.0  # Normalize to [0, 1]
    return img

# Load and preprocess images
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]
images = np.vstack([preprocess_image(img_path) for img_path in image_paths])

# Extract features using VGG16
features = base_model.predict(images)
features = features.reshape((features.shape[0], -1))  # Flatten the features

# Apply K-Means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(features)
labels = kmeans.labels_

# Visualize the clusters
for cluster in range(num_clusters):
    plt.figure(figsize=(20, 20))
    cluster_images = np.array(image_paths)[labels == cluster]
    for i, img_path in enumerate(cluster_images[:25]):
        plt.subplot(5, 5, i+1)
        img = load_img(img_path, target_size=img_size)
        plt.imshow(img)
        plt.axis('off')
    plt.suptitle(f'Cluster {cluster + 1}', size=20)
    plt.show()
