In [1]:
#imported libs
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.utils import get_file
import os
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from collections import defaultdict


In [2]:
# Parameters
dataset_dir = r"C:\Users\princ\Downloads\datasets\caltech-101\caltech-101\101_ObjectCategories"
output_dir = r"C:\Users\princ\Downloads\datasets\caltech-101\processed"  # Output directory for processed images
kernel_size = (13, 13)  # Kernel size for Gaussian blur
sigma = 2.0  # Standard deviation for Gaussian kernel

# Function to apply Gaussian filter to an image
def apply_gaussian_filter(image_path, kernel_size, sigma):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)  # Read image in color
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    blurred = cv2.GaussianBlur(image, kernel_size, sigma)
    return blurred

# Process all images in the dataset
def process_dataset(input_dir, output_dir, kernel_size, sigma):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for class_name in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_name)
        if os.path.isdir(class_path):
            # Create corresponding output directory for this class
            output_class_dir = os.path.join(output_dir, class_name)
            os.makedirs(output_class_dir, exist_ok=True)

            for image_name in os.listdir(class_path):
                input_image_path = os.path.join(class_path, image_name)
                output_image_path = os.path.join(output_class_dir, image_name)

                try:
                    # Apply Gaussian filter
                    processed_image = apply_gaussian_filter(input_image_path, kernel_size, sigma)

                    # Save the processed image
                    cv2.imwrite(output_image_path, processed_image)
                except Exception as e:
                    print(f"Error processing {input_image_path}: {e}")

# Apply Gaussian filter to the dataset
process_dataset(dataset_dir, output_dir, kernel_size, sigma)

print("Processing complete. Filtered images saved to:", output_dir)


Processing complete. Filtered images saved to: C:\Users\princ\Downloads\datasets\caltech-101\processed


In [3]:
dataset_dir = r"C:\Users\princ\Downloads\datasets\caltech-101\processed"


# Parameters
batch_size = 32
image_size = (200, 200)

# Load the training and validation datasets
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=image_size,
    batch_size=batch_size
)

val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=image_size,
    batch_size=batch_size
)


def dataset_to_numpy(dataset):
    """
    Convert a tf.data.Dataset into NumPy arrays for features and labels.
    Args:
        dataset: A tf.data.Dataset object.
    Returns:
        X: Numpy array of features (images).
        y: Numpy array of labels.
    """
    X = []
    y = []
    for images, labels in dataset:
        X.append(images.numpy())
        y.append(labels.numpy())
    return np.concatenate(X, axis=0), np.concatenate(y, axis=0)

# Convert the train and validation datasets to NumPy arrays
X_train, y_train = dataset_to_numpy(train_dataset)
X_test, y_test = dataset_to_numpy(val_dataset)

#print(f"X_train shape: {X_train.shape}")
#print(f"y_train shape: {y_train.shape}")
#print(f"X_test shape: {X_test.shape}")
#print(f"y_test shape: {y_test.shape}")

Found 8677 files belonging to 100 classes.
Using 6942 files for training.
Found 8677 files belonging to 100 classes.
Using 1735 files for validation.


In [4]:
def count_images_per_class(dataset):
    class_counts = defaultdict(int)
    class_names = dataset.class_names  # Get class names
    for images, labels in dataset:
        for label in labels.numpy():
            class_counts[class_names[label]] += 1
    return class_counts

# Get class-wise counts
train_class_counts = count_images_per_class(train_dataset)
val_class_counts = count_images_per_class(val_dataset)

# Display the results
print(f"{'Class':<20} {'Train':<10} {'Validation':<10}")
print("-" * 50)
all_classes = sorted(set(train_class_counts.keys()).union(val_class_counts.keys()))
for class_name in all_classes:
    train_count = train_class_counts.get(class_name, 0)
    val_count = val_class_counts.get(class_name, 0)
    print(f"{class_name:<20} {train_count:<10} {val_count:<10}")

Class                Train      Validation
--------------------------------------------------
Faces                697        173       
Leopards             172        28        
Motorbikes           622        176       
accordion            40         15        
airplanes            637        163       
anchor               35         7         
ant                  37         5         
barrel               36         11        
bass                 40         14        
beaver               30         16        
binocular            31         2         
bonsai               98         30        
brain                81         17        
brontosaurus         39         4         
buddha               62         23        
butterfly            73         18        
camera               36         14        
cannon               35         8         
car_side             98         25        
ceiling_fan          41         6         
cellphone            50         9         
cha

In [5]:
#Accuracy
def accuracy(y_test1, y_pred1):
    y_pred1 = np.array(y_pred1)
    counter = 0
    for i in range(len(y_pred1)):
      if (y_pred1[i] == y_test1[i]):
        counter += 1
    accuracy = counter / len(y_pred1)
    accuracy *= 100
    return accuracy

In [None]:
#Color Histogran Extraction def
def extract_color_histogram(image, bins=(8, 8, 8)):
    """
    Extract a 3D color histogram from an RGB image.
    Args:
        image (numpy array): Input image in RGB format.
        bins (tuple): Number of bins for each channel (R, G, B).
    Returns:
        numpy array: Flattened color histogram feature vector.
    """
    # Calculate the 3D histogram for the HSV channels
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 255, 0, 256, 0, 256])
    # Normalize the histogram to ensure invariance to lighting changes
    hist = cv2.normalize(hist, hist).flatten()

    return hist

In [7]:
#HOG def
def extract_hog_features(image):
    normalized_image = image /255.0
    channels = cv2.split(normalized_image)
    # HOG parameters
    winSize = (32, 32)
    blockSize = (8, 8)
    blockStride = (8, 8)
    cellSize = (8, 8)
    nbins = 9
    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins)
    # Initialize a list to hold HOG features for each channel
    concatenated_hog_features = []
    
    for channel in channels:
        # Ensure the channel is grayscale and of type uint8
        channel = (channel * 255).astype(np.uint8) if channel.dtype == np.float32 else channel
        hog_features = hog.compute(channel)
        concatenated_hog_features.append(hog_features.flatten())

    # Concatenate features from all channels into a single feature vector
    concatenated_hog_features = np.concatenate(concatenated_hog_features)
    return concatenated_hog_features

In [8]:
#LBP def
from skimage.feature import local_binary_pattern

def extract_lbp_features(image, num_points=32, radius=8):
    #normalized_image = image /255.0
    # Split the image into Red, Green, and Blue channels
    channels = cv2.split(image)
    # Initialize a list to store concatenated LBP histograms for all channels
    concatenated_hist = []
    
    # Loop through each channel
    for channel in channels:
        # Compute LBP representation for the channel
        lbp = local_binary_pattern(channel, num_points, radius, method='uniform')
        # Calculate the histogram of LBP
        (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
        # Normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)
        
        # Append the histogram to the concatenated list
        concatenated_hist.extend(hist)
    
    # Convert the concatenated list to a numpy array and return
    return np.array(concatenated_hist)

In [9]:
# Step 1: Extract LBP features for train and test
lbp_features_train = np.array([extract_lbp_features(image) for image in X_train])
lbp_features_test  = np.array([extract_lbp_features(image) for image in X_test])



In [10]:
# Step 2: Extract HOG features for train and test
hog_features_train = np.array([extract_hog_features(image) for image in X_train])
hog_features_test  = np.array([extract_hog_features(image) for image in X_test])
#reduce the features
from sklearn.feature_selection import SelectKBest, f_classif 
selector = SelectKBest(score_func=f_classif, k=400)
hog_features_train = selector.fit_transform(hog_features_train, y_train)
hog_features_test = selector.transform(hog_features_test)

In [11]:
# Step 3: Extract Color Histogram features for train and test
clhg_features_train = np.array([extract_color_histogram(image) for image in X_train])
clhg_features_test  = np.array([extract_color_histogram(image) for image in X_test])
#reduce the features
from sklearn.feature_selection import SelectKBest, f_classif 
selector2 = SelectKBest(score_func=f_classif, k=180)
hog_features_train = selector2.fit_transform(hog_features_train, y_train)
hog_features_test = selector2.transform(hog_features_test)

In [15]:
class KNNClassifier:
    def __init__(self, k = 3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        
    def count_occurrences(self,input_array,distances):
        output_array = []
        for i in range(len(input_array)):
            count = sum(
                np.array_equal(input_array[i], other)
                for other in input_array
            )
            output_array.append(count)
        return output_array.index(max(output_array))  
            #output_array.append((1/count)*distances[i])
        #return output_array.index(min(output_array))  # return index of minimum count (1/count * distance)'''
    


    def predict(self, image_test):
        distances = np.linalg.norm(self.X_train - image_test.reshape(1,-1), axis=1)
        k_nearest = np.argsort(distances)[:self.k]
        #print(k_nearest)
        k_nearest_labels = self.y_train[k_nearest]
        #print(k_nearest_labels)
        prediction = self.count_occurrences(k_nearest_labels,k_nearest)
        
        
        return np.array(k_nearest_labels[prediction])

In [16]:
#compare with pre-built KNN from sklearn

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(lbp_features_train, y_train)
predictions = knn.predict(lbp_features_test)
accuracy(y_test, predictions)


34.178674351585016

In [17]:
model = KNNClassifier(9)
model.fit(lbp_features_train,y_train)

y_pred = np.array([model.predict( i.reshape(1, -1)) for i in  lbp_features_test])

In [18]:
accuracy(y_test, y_pred)

34.5821325648415

In [19]:
model2 = KNNClassifier(9)
model2.fit(hog_features_train,y_train)

y_pred2 = np.array([model2.predict( i.reshape(1, -1)) for i in  hog_features_test])

In [20]:
accuracy(y_test, y_pred2)

24.322766570605186

In [21]:
model3 = KNNClassifier(9)
model3.fit(clhg_features_train,y_train)
  
y_pred3 = np.array([model3.predict( i.reshape(1, -1)) for i in clhg_features_test])


In [22]:
accuracy(y_test, y_pred3)

32.680115273775215

In [23]:
#KNN for the three features concatenated together , gives higher accuracy
from sklearn.feature_selection import SelectKBest, f_classif
combined_features = np.concatenate     ([clhg_features_train*0.3, lbp_features_train*1.1], axis=1)
combined_features_test = np.concatenate([clhg_features_test*0.3, lbp_features_test *1.1], axis=1)

selector = SelectKBest(score_func=f_classif, k=450)
selected_features = selector.fit_transform(combined_features, y_train)
selected_features_test = selector.transform(combined_features_test)


knn2 = KNeighborsClassifier(n_neighbors=9,weights = "distance")
knn2.fit(selected_features, y_train)
predictionss = knn2.predict(selected_features_test)
accuracy(y_test, predictionss)

  f = msb / msw


38.27089337175792

In [24]:
class KMeans:
    def __init__(self, n_clusters=101, max_iter=300, tol=1e-6):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol  # Tolerance for convergence

    def fit(self, X):
        # Step 1: Randomly initialize centroids
        np.random.seed(42)
        self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
        print(X.shape[0])
        print (self.centroids.shape)
        for iteration in range(self.max_iter):
            # Step 2: Assign points to the nearest cluster
            self.labels = self._assign_clusters(X)

            # Step 3: Compute new centroids
            new_centroids = np.array([  X[self.labels == i].mean(axis=0) for i in range(self.n_clusters)  ])

            # Check for convergence
            if np.all(np.abs(new_centroids - self.centroids) < self.tol):
                print(f"Converged at iteration {iteration}")
                break

            self.centroids = new_centroids

    def _assign_clusters(self, X):
        '''My implementation , faster implementation than chatgpt implementation
        distances=[]
        for i in range (len(X) ):
            distances.append(     np.linalg.norm(self.centroids - X[i] , axis=1)    ) 
        return np.argmin(  np.array(distances) , axis = 1  )'''
    
        # Compute distances from each point to each centroid
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2) #(distances.shape) 7316 x 100 , 100 euclidean distances for each img in the data set
        return np.argmin(distances, axis=1)  # Assign each point to the nearest centroid

    def predict(self, X): 
        X = np.array(X).flatten()
        # Compute the Euclidean distances between the test case and all centroids
        distances = np.linalg.norm(self.centroids - X, axis=1)   # (100 x no. of features) - (1 X no.of features)   , broadcasting occurs = (100 X ,)
        return np.argmin(distances)

In [25]:
model_K_mean = KMeans()
model_K_mean.fit(hog_features_train)

6942
(101, 180)
Converged at iteration 42


In [None]:
from scipy.stats import mode
import numpy as np

def test_accuracy(model, X_test, y_test):
    # Step 1: Predict the cluster for each test data point
    predicted_clusters = np.array([model.predict(x.reshape(1, -1)) for x in X_test])

    # Step 2: Map clusters to true labels (majority class in each cluster)
    cluster_labels = np.zeros(model.n_clusters)  # Array to hold the true label for each cluster
    
    for cluster in range(model.n_clusters): # 0 ---- 101
        # Get the indices of all points assigned to this cluster
        cluster_points = y_test[predicted_clusters == cluster]  
        
        # Assign the most common true label in this cluster
        if len(cluster_points) > 0:
            most_common_label = mode(cluster_points)[0]
            cluster_labels[cluster] = most_common_label

    # Step 3: Predict labels for each test case
    predicted_labels = np.array([cluster_labels[cluster] for cluster in predicted_clusters])
    
    # Step 4: Calculate accuracy
    accuracy = np.sum(predicted_labels == y_test) / len(y_test) * 100
    return accuracy

# Example usage:
'''accuracy = test_accuracy(model_K_mean, hog_features_test , y_test)
print(f"Clustering Accuracy: {accuracy:.2f}%")'''


'accuracy = test_accuracy(model_K_mean, hog_features_test , y_test)\nprint(f"Clustering Accuracy: {accuracy:.2f}%")'

In [None]:
from sklearn.cluster import KMeans
# Step 4: Perform K-means clustering
kmeanss = KMeans(n_clusters=101, random_state=42)  # Choose number of clusters
kmeanss.fit(features_3d)
labels = kmeanss.labels_


def test_accuracy_sklearn_kmeans(model, X_test, y_test):
    # Step 1: Predict the cluster for each test data point
    predicted_clusters = model.predict(X_test)

    # Step 2: Map clusters to true labels (majority class in each cluster)
    cluster_labels = np.zeros(model.n_clusters)  # Array to hold the true label for each cluster
    
    for cluster in range(model.n_clusters):
        # Get the indices of all points assigned to this cluster
        cluster_points = y_test[predicted_clusters == cluster]
        
        # Assign the most common true label in this cluster
        if len(cluster_points) > 0:
            most_common_label = mode(cluster_points)[0]  # Get the most common label in this cluster
            cluster_labels[cluster] = most_common_label

    # Step 3: Predict labels for each test case
    predicted_labels = np.array([cluster_labels[cluster] for cluster in predicted_clusters])
    
    # Step 4: Calculate accuracy
    accuracy_test = np.sum(predicted_labels == y_test) / len(y_test) * 100
    return accuracy_test
accuracy_test2 = test_accuracy_sklearn_kmeans(kmeanss, Test_features_3d_X , y_test)
print(f"Clustering Accuracy with sklearn's KMeans: {accuracy_test2:.2f}%")


Clustering Accuracy with sklearn's KMeans: 27.61%


In [29]:
class SVMClassifier:
    def __init__(self, param_grid=None, cv=3):
        """
        Initialize the SVM classifier with hyperparameter tuning.
        Args:
            param_grid (dict): Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values.
            cv (int): Number of cross-validation folds for `GridSearchCV`.
        """
        # Default parameter grid if none is provided
        if param_grid is None:
            self.param_grid = {
                'kernel': ['linear', 'rbf', 'poly'],  # Kernel types to try
                'C': [0.1, 1, 10],                  # Regularization strength values
                'gamma': ['scale', 'auto']          # Gamma values (only for rbf, poly, sigmoid)
            }
        else:
            self.param_grid = param_grid

        self.cv = cv
        self.grid_search = None

    def fit(self, X_train, y_train):
        """
        Fit the SVM model on the training data using GridSearchCV.
        Args:
            X_train (numpy array): Feature vectors for training data.
            y_train (numpy array): Labels for training data.
        """
        # Perform grid search to find the best hyperparameters
        self.grid_search = GridSearchCV(
            SVC(),
            self.param_grid,
            cv=self.cv,
            scoring='accuracy',
            verbose=1  # Display progress
        )
        self.grid_search.fit(X_train, y_train)

        # Print the best parameters and score
        print("Best parameters:", self.grid_search.best_params_)
        print("Best cross-validation accuracy:", self.grid_search.best_score_)

    def predict(self, X_test):
        """
        Predict the labels for the test data.
        Args:
            X_test (numpy array): Feature vectors for test data.
        Returns:
            numpy array: Predicted labels.
        """
        if self.grid_search is None:
            raise ValueError("Model is not trained yet. Call `fit` before prediction.")
        
        best_model = self.grid_search.best_estimator_  # Get the best model from the grid search
        return best_model.predict(X_test)

    def evaluate(self, X_test, y_test):
        """
        Evaluate the model on test data and print a classification report.
        Args:
            X_test (numpy array): Feature vectors for test data.
            y_test (numpy array): True labels for test data.
        """
        y_pred = self.predict(X_test)
        print("Classification Report:")
        print(classification_report(y_test, y_pred))

In [30]:
modelx = SVMClassifier()
modelx.fit(lbp_features_train,y_train)

y_predx = np.array([modelx.predict( i.reshape(1, -1)) for i in  lbp_features_test])

Fitting 3 folds for each of 18 candidates, totalling 54 fits
Best parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'poly'}
Best cross-validation accuracy: 0.35177182368193605


In [31]:
accuracy(y_test, y_predx)

36.65706051873199

In [32]:
modely = SVMClassifier()
modely.fit(hog_features_train,y_train)

y_predy = np.array([modely.predict( i.reshape(1, -1)) for i in  hog_features_test])

Fitting 3 folds for each of 18 candidates, totalling 54 fits
Best parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best cross-validation accuracy: 0.2582829155862864


In [33]:
accuracy(y_test, y_predy)

27.37752161383285

In [34]:
modelz = SVMClassifier()
modelz.fit(clhg_features_train,y_train)

y_predz = np.array([modelz.predict( i.reshape(1, -1)) for i in clhg_features_test])

Fitting 3 folds for each of 18 candidates, totalling 54 fits
Best parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best cross-validation accuracy: 0.3573898012100259


In [None]:
accuracy(y_test, y_predz)

37.694524495677236