In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import exposure
import os
from skimage import io, color, feature
from PIL import Image
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils import shuffle
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

In [None]:
# Function to extract HOG features from an image and visualize it
def extract_hog_features(image):
    # Convert the image to grayscale
    gray_image = color.rgb2gray(image)

    # Calculate HOG features
    hog_features, hog_image = feature.hog(gray_image, visualize=True)

    # Enhance the contrast of the HOG image for better visualization
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

    return hog_features, hog_image_rescaled


In [None]:
# Path to the root folder of your dataset
dataset_path = 'Training-1'

# List all subdirectories (assuming each subdirectory corresponds to a class)
class_folders = [f.path for f in os.scandir(dataset_path) if f.is_dir()]
features_list = []
labels_list = []

# Loop through each class folder
for class_folder in class_folders:
    class_name = os.path.basename(class_folder)

    # Loop through each image in the class folder
    for image_filename in os.listdir(class_folder):
        image_path = os.path.join(class_folder, image_filename)

        # Load the image
        image = io.imread(image_path)
        img_pil = Image.fromarray(image)
        img_resized = img_pil.resize((128, 128))

        # Extract HOG features and visualize
        hog_features, hog_image = extract_hog_features(img_resized)

        # Display the original image and the HOG features
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5), sharex=True, sharey=True)

        ax1.axis('off')
        ax1.imshow(img_resized, cmap=plt.cm.gray)
        ax1.set_title('Original Image')

        ax2.axis('off')
        ax2.imshow(hog_image, cmap=plt.cm.gray)
        ax2.set_title('HOG Features')

        plt.show()

        # Append HOG features to the features list
        features_list.append(hog_features)

        # Append the label to the labels list
        labels_list.append(class_name)


In [None]:
# Convert lists to NumPy arrays
features_array = np.array(features_list)
labels_array = np.array(labels_list)

# Use LabelEncoder to convert class names into numeric labels
label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(labels_array)

print(features_array)
print(numeric_labels)

In [None]:
# Standardize the features
sc = StandardScaler()
features_array = sc.fit_transform(features_array)
print(features_array)

In [None]:
# Apply PCA to reduce the dimensionality for visualization
pca = PCA(n_components=64)
features_pca = pca.fit_transform(features_array)

In [None]:
# Shuffle the data
features_array, numeric_labels = shuffle(features_array, numeric_labels, random_state=42)

In [None]:
# Reshape the data to be a list of 64-dimensional vectors (if you have 64 components after PCA)
data = features_pca.reshape((len(features_pca), -1))

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
data = sc.fit_transform(data)
features_array = sc.fit_transform(features_array)
print(data[0])
# X_test = sc.transform(X_test)

In [None]:
# Apply k-means clustering
n_clusters = len(class_folders)  # Number of clusters based on the number of classes
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(features_array)
kmeans.fit(data)

In [None]:
# Visualize the cluster centers (representative images)
fig, ax = plt.subplots(1, n_clusters, figsize=(15, 3))

for i in range(n_clusters):
    center_image = kmeans.cluster_centers_[i].reshape(8, 8)  # Reshape to original dimensions
    ax[i].imshow(center_image, cmap='gray')
    ax[i].axis('off')
    ax[i].set_title(f'Cluster {i}')

plt.show()

In [None]:

# Predict clusters for the data using the transformed data
predicted_clusters = kmeans.predict(features_pca)

In [None]:
# Calculate accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(numeric_labels, predicted_clusters)
print(f'Accuracy: {accuracy}')

In [None]:
# Create a scatter plot
plt.figure(figsize=(8, 6))
for cluster in range(n_clusters):
    # Plot points belonging to the cluster
    plt.scatter(features_pca[predicted_clusters == cluster, 0],
                features_pca[predicted_clusters == cluster, 1],
                label=f'Cluster {cluster}', alpha=0.7)

# Plot actual classes with different markers
for class_label in range(len(class_folders)):
    plt.scatter(features_pca[numeric_labels == class_label, 0],
                features_pca[numeric_labels == class_label, 1],
                label=f'Class {class_label}', marker='x', s=100, edgecolors='k')

plt.title('K-means Predicted Clusters vs Actual Classes')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

In [None]:

# # Plot actual classes with different markers
for class_label in range(len(class_folders)):
    plt.scatter(features_pca[numeric_labels == class_label, 0],
                features_pca[numeric_labels == class_label, 1],
                label=f'Class {class_label}', marker='x', s=100, edgecolors='k')


In [None]:
from sklearn.cluster import KMeans

# Replace X with your dataset
k_values = range(1, 7)
inertia_values = []

for k in k_values:
    kmeans = KMeans(n_clusters=k, n_init=10,random_state=42)
    kmeans.fit(features_array)
    inertia_values.append(kmeans.inertia_)

In [None]:
import matplotlib.pyplot as plt

plt.plot(k_values, inertia_values, marker='o')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.title('K-means Inertia vs. Number of Clusters')
plt.show()
