# Spatial Pyramid Pooling

In [1]:
pip install scikit-image


Note: you may need to restart the kernel to use updated packages.


In [1]:
import cv2
import numpy as np
from skimage import transform




In [2]:
def spatial_pyramid_pooling(img, levels):
    height, width, _ = img.shape
    pooled_features = []
    for level in levels:
        grid_size = (2 ** level, 2 ** level)
        cell_size = (height // grid_size[0], width // grid_size[1])
        for i in range(grid_size[0]):
            for j in range(grid_size[1]):
                cell = img[i * cell_size[0]:(i + 1) * cell_size[0], j * cell_size[1]:(j + 1) * cell_size[1]]
                pooled_features.append(np.mean(cell, axis=(0, 1)))
    return np.array(pooled_features)

In [None]:
import os
import cv2
import numpy as np

def load_dataset(dataset_path):
    images = []
    labels = []
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".png") or file.endswith(".jpg"):
                image_path = os.path.join(root, file)
                label = os.path.basename(root)  # Assuming each subdirectory represents a class
                # Read the image
                image = cv2.imread(image_path)
                # Apply median filter
                image = cv2.medianBlur(image, 3)  # Adjust kernel size as needed
                # Resize image to a fixed size
                image = cv2.resize(image, (32, 32))
                # You can perform additional preprocessing steps here if needed
                images.append(image)
                labels.append(label)
    return images, labels


In [None]:
dataset_path = 'datSet'
images, labels = load_dataset(dataset_path)

In [16]:
images = np.array(images)

In [17]:
levels = [1, 2, 4]  # Define the levels of the pyramid (e.g., 1x1, 2x2, 4x4)
pooled_features = [spatial_pyramid_pooling(img, levels) for img in images]

In [18]:
data = np.array(pooled_features)


In [19]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [20]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)


In [21]:
svm_classifier = SVC()


In [22]:
# Flatten the images before applying SPP
X_train_flattened = X_train.reshape(X_train.shape[0], -1)

# Train the SVM classifier
svm_classifier.fit(X_train_flattened, y_train)


SVC()

In [23]:
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

y_pred = svm_classifier.predict(X_test_flattened)


In [24]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.49238578680203043


In [19]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    elefante       0.63      0.59      0.61        75
    farfalla       0.47      0.60      0.52        72
       gatto       0.48      0.64      0.55        88
      pecora       0.66      0.47      0.55        93
       ragno       0.42      0.30      0.35        66

    accuracy                           0.53       394
   macro avg       0.53      0.52      0.52       394
weighted avg       0.54      0.53      0.52       394



In [15]:
import cv2
import numpy as np

In [16]:
new_image_path = 'far.jpg'
new_image = cv2.imread(new_image_path)


In [17]:
preprocessed_image = cv2.resize(new_image, (32, 32))  # Resize to match training images size

# Apply Spatial Pyramid Pooling to the preprocessed image
levels = [1, 2, 4]  # Define the levels of the pyramid (e.g., 1x1, 2x2, 4x4)
pooled_features = spatial_pyramid_pooling(preprocessed_image, levels)

# Reshape the feature vector to match the format expected by the model
pooled_features = pooled_features.reshape(1, -1)

# Predict the label for the new image
predicted_label = svm_classifier.predict(pooled_features)

In [18]:
predicted_label

array(['farfalla'], dtype='<U8')