In [None]:
import zipfile
import os

zip_ref = zipfile.ZipFile('/content/drive/MyDrive/Dataset.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
dataset_dir = '/content/Dataset'

In [None]:
#Importing Modules
import os
import cv2
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

In [None]:
# Set the image and batch size
image_size = (224, 224)
random_state = 42


In [None]:
n_clusters = 2  # Set the desired number of clusters (assumed number of classes)



In [None]:
# Load the pre-trained VGG16 model (excluding the top layers)
model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))



In [None]:
# Function to extract features from an image
def extract_features(image_path):
    # Load and preprocess the image
    image = cv2.imread(image_path)
    image = cv2.resize(image, image_size)
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)

    # Extract features using the pre-trained VGG16 model
    features = model.predict(image)
    features = np.reshape(features, (features.shape[0], -1))

    return features



In [None]:
# Load the dataset and extract features
features = []
filenames = []
for filename in os.listdir(dataset_dir):
    image_path = os.path.join(dataset_dir, filename)
    if os.path.isfile(image_path):
        # Extract features from the image
        image_features = extract_features(image_path)
        features.append(image_features)
        filenames.append(filename)





In [None]:
# Convert the features list to a numpy array
features = np.concatenate(features, axis=0)

# Perform clustering on the features
kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)
cluster_labels = kmeans.fit_predict(features)

# Create a DataFrame with filenames and cluster labels
df = pd.DataFrame({'filename': filenames, 'cluster_label': cluster_labels})





In [None]:
# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=random_state)



In [None]:
# Prepare the training data
train_features = features[train_df.index]
train_labels = train_df['cluster_label']



In [None]:
# Train a classifier on the clustered data
classifier = RandomForestClassifier(random_state=random_state)
classifier.fit(train_features, train_labels)



In [None]:
# Prepare the testing data
test_features = features[test_df.index]
test_labels = test_df['cluster_label']



In [None]:
# Make predictions on the testing data
predictions = classifier.predict(test_features)



In [None]:
# Evaluate the accuracy of the predictions
accuracy = accuracy_score(test_labels, predictions)
print('Accuracy:', accuracy)

Accuracy: 0.9938650306748467
