# **Import Libraries**

In [269]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import time
import pickle

In [270]:
models = {}

# **Load Data**

In [271]:
def Data_Preprocessing_2(root_folder, folder):
    path_images = []
    labels = []

    # Iterate through each product folder
    for product_folder in os.listdir(root_folder):
        product_path = os.path.join(root_folder, product_folder)

        # List all image files in the training or validation folder
        current_folder = os.path.join(product_path, folder)

        # Check if the current folder exists
        if os.path.exists(current_folder):
            current_images = [os.path.join(current_folder, f) for f in os.listdir(current_folder)]
            
            # Add the paths and labels to the lists
            path_images.extend(current_images)
            labels.extend([int(product_folder)] * len(current_images))
        else:
            print(f"{folder} folder not found for product {product_folder}")

    return path_images, labels

In [272]:
image_paths, y_train = Data_Preprocessing_2('/kaggle/input/product-classification/Product Classification/', 'Train')
val_image_paths, y_val = Data_Preprocessing_2('/kaggle/input/product-classification/Product Classification/', 'Validation')

Validation folder not found for product 6


# **Extract SIFT Descriptors**

In [273]:
def extract_sift_features(image_paths):
    sift = cv2.SIFT_create()
    sift_features = []

    for path in image_paths:
        img = cv2.imread(path, cv2.COLOR_BGR2GRAY)
        keypoints, descriptors = sift.detectAndCompute(img, None)
        sift_features.append(descriptors)

    return sift_features

# **Create BOW for an image**

In [274]:
def BOW(image_descriptors, kmeans):
    # Assign each descriptor to the nearest cluster
    cluster_assignments = kmeans.predict(image_descriptors)
    # Create a histogram 
    bow_representation, _ = np.histogram(cluster_assignments, bins=range(kmeans.n_clusters + 1))
    return bow_representation

# **Use k-means for vocabulary**

In [275]:
kmeans = KMeans(n_clusters=1500, random_state=42)

In [276]:
# Extract SIFT features for Train images

sift_features = extract_sift_features(image_paths)

# Extract SIFT features for validation images

sift_features_val = extract_sift_features(val_image_paths)




In [277]:
# For Train
# Convert descriptors to 'float32'
sift_features_float = [descriptors.astype('float32') for descriptors in sift_features]

# Flatten and stack SIFT descriptors for train
flat_descriptors = np.vstack(sift_features_float)

#****************************************************************************************************************************************
# For Vaildation
# Convert descriptors to 'float32'
sift_features_float_val = [descriptors.astype('float32') if descriptors is not None else None for descriptors in sift_features_val]


# **Fit Kmeans**

In [278]:
kmeans.fit(flat_descriptors)
models['kmeans']=kmeans



# Create BoW representations for train images

In [279]:
bow_representations = [BOW(descriptors, kmeans) for descriptors in sift_features_float]

# Create BoW representations for validation images

In [280]:
bow_representations_val = [BOW(descriptors, kmeans) for descriptors in sift_features_val]

# StandardScaler for bow_representation train & validation

In [281]:
scaler = StandardScaler().fit(bow_representations)
models['scaler']=scaler
bow_representations = scaler.transform(bow_representations)
bow_representations_val=scaler.transform(bow_representations_val)

# **Train Classifier**

In [282]:
# Train a Logistic Regression classifier
lr_classifier = LogisticRegression(max_iter=1000,solver='liblinear')
start_train_lr = time.time()
lr_classifier.fit(bow_representations, y_train)
end_train_lr = time.time()
models['logistic_regression'] = lr_classifier

In [283]:
training_time_lr = end_train_lr - start_train_lr
print('Training time of LogisticRegression',training_time_lr)

Training time of LogisticRegression 2.7530322074890137


# Accuracy Train

In [284]:
lr_accuracy_train = accuracy_score(y_train, lr_classifier.predict(bow_representations))

In [285]:
print(f"Logistic Regression Accuracy Train: {lr_accuracy_train}")

Logistic Regression Accuracy Train: 1.0


# Prediction Validation

In [286]:
start_val_lr = time.time()
lr_predictions = lr_classifier.predict(bow_representations_val)
end_val_lr = time.time()

In [287]:
Val_time_lr = end_val_lr - start_val_lr
print('Validation time of LogisticRegression',Val_time_lr)

Validation time of LogisticRegression 0.0015330314636230469


# Accuracy Validation

In [288]:
lr_accuracy_val = accuracy_score(y_val, lr_predictions)
print(f"Logistic Regression Accuracy Validation: {lr_accuracy_val}")

Logistic Regression Accuracy Validation: 0.9705882352941176


In [289]:
model_pkl_file = "Sift_Classification_model.pkl"  
with open(model_pkl_file, 'wb') as file:  
    pickle.dump(models, file)