<a href="https://colab.research.google.com/github/abdulrehman1215/Image-Classification-using-SIFT-And-Machine-Learning/blob/main/Flower_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
import os
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, f1_score, ConfusionMatrixDisplay, confusion_matrix
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier
from matplotlib import pyplot as plt


# Load Dataset

In [None]:
# Load the dataset
dir = '/content/drive/MyDrive/flower_photos/flower_photos/'

images = []
labels = []
classes = os.listdir(dir)

In [None]:
for label in os.listdir(dir):
    for img_file in os.listdir(dir + label):
        img = cv2.imread(os.path.join(dir, label, img_file))
        images.append(img)
        labels.append(label)

In [None]:
train_images,test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42, shuffle = True)

# Extract SIFT Features

In [None]:
# Extract SIFT features
sift = cv2.xfeatures2d.SIFT_create()

In [None]:
train_descriptors = []
for img in train_images:
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    train_descriptors.append(descriptors)
np.vstack(train_descriptors).shape

# Build Visual Vocabulary

In [None]:
# Cluster SIFT descriptors into visual words
kmeans = KMeans(n_clusters=200)
kmeans.fit(np.vstack(train_descriptors))

In [None]:
# Generate histograms for each image using the visual words
train_histograms = []
for descriptors in train_descriptors:
    visual_words = kmeans.predict(descriptors)
    histogram, _ = np.histogram(visual_words, bins=np.arange(101))
    train_histograms.append(normalize(histogram.reshape(1, -1)))

In [None]:
test_descriptors = []
for img in test_images:
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    test_descriptors.append(descriptors)

In [None]:
test_histograms = []
for descriptors in test_descriptors:
    visual_words = kmeans.predict(descriptors)
    histogram, _ = np.histogram(visual_words, bins=np.arange(101))
    test_histograms.append(normalize(histogram.reshape(1, -1)))

# Random Forest Classifier

In [None]:
# Train Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(np.vstack(train_histograms), train_labels)


# Predict using Random Forest model
rf_predictions = rf.predict(np.vstack(test_histograms))
rf_accuracy = accuracy_score(test_labels, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)


rf_f1 = f1_score(test_labels, rf_predictions, average='weighted')
print("Weighted Random Forest F-1 Score:", rf_f1)

In [None]:
fig,ax = plt.subplots(figsize = (6,6))

# Create confusion matrix
cm_rf = confusion_matrix(test_labels, rf_predictions)
cm_display_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rf, display_labels = classes)

cm_display_rf.plot(ax=ax)
plt.title("Flowers Dataset: Random Forest Classifier")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

In [None]:
FP_rf = cm_rf.sum(axis = 0) - np.diag(cm_rf)
FN_rf = cm_rf.sum(axis = 1) - np.diag(cm_rf)
TP_rf = np.diag(cm_rf)
TN_rf = cm_rf.sum() - (FP_rf + FN_rf + TP_rf)

print('Random Forest False Positive:',FP_rf)
print('Random Forest False Negative:',FN_rf)
print('Random Forest True Positive:',TP_rf)
print('Random Forest True Negative:',TN_rf)

In [None]:
TPR_rf = TP_rf/(FN_rf + TP_rf)
FPR_rf = FP_rf/ (FN_rf + TP_rf)
print('Random Forest False Positive Rate:',FPR_rf)
print('Random Forest True Positive Rate:',TPR_rf)

# SVM Classifier

In [None]:
# Train SVM model
svm = SVC(kernel='linear')
svm.fit(np.vstack(train_histograms), train_labels)


# Predict using SVM model
svm_predictions = svm.predict(np.vstack(test_histograms))
svm_accuracy = accuracy_score(test_labels, svm_predictions)
svm_f1 = f1_score(test_labels, svm_predictions, average='weighted')
print("SVM Accuracy:", svm_accuracy)
print("Weighted SVM F-1 Score:", svm_f1)

In [None]:
fig,ax = plt.subplots(figsize = (6,6))

# Create confusion matrix
cm = confusion_matrix(test_labels, svm_predictions)
cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = classes)

cm_display.plot(ax=ax)
plt.title("Flower DataSet: SVM Classifier")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

In [None]:
FP = cm.sum(axis = 0) - np.diag(cm)
FN = cm.sum(axis = 1) - np.diag(cm)
TP = np.diag(cm)
TN = cm.sum() - (FP + FN + TP)

print('SVM False Positive:',FP)
print('SVM False Negative:',FN)
print('SVM True Positive:',TP)
print('SVM True Negative:',TN)

In [None]:
TPR = TP/(FN + TP)
FPR = FP/ (FN + TP)
print('SVM False Positive Rate:',FPR)
print('SVM True Positive Rate:',TPR)

# Saving Model (Optional)

In [None]:
# import pickle

# with open("/content/drive/MyDrive/kmeans_200.pkl", "rb") as f:
#     model = pickle.load(f)

In [None]:
# with open("/content/drive/MyDrive/kmeans_200.pkl", "wb") as f:
#     pickle.dump(kmeans, f)