# Support Vector Machine(SVM) to classify images of cats and dogs

In [1]:
# Importing libraries
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
# Paths
train_dir = "datasets/train"  
test_dir = "datasets/test"

In [4]:
# Function for loading the images
def load_images(folder, label, img_size=(64,64), limit=None):
    data = []
    count = 0
    for fname in os.listdir(folder):
        img_path = os.path.join(folder, fname)
        try:
            img = load_img(img_path, target_size=img_size)
            img_array = img_to_array(img)
            data.append((img_array, label))
            count += 1
            if limit and count >= limit:
                break
        except:
            continue
    return data

# Loading the dataset

In [6]:
LIMIT = 5000  # per class, safe for most laptops
cats = load_images(os.path.join(train_dir, "cats"), 0, limit=LIMIT)
dogs = load_images(os.path.join(train_dir, "dogs"), 1, limit=LIMIT)
data = cats + dogs
print("Total samples loaded:", len(data))

Total samples loaded: 10000


# Data Augmentation

In [7]:
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True,
    zoom_range=0.05
)
augmented_data = []
for img_array, label in tqdm(data, desc="Augmenting (1 per image)"):
    img_array_exp = np.expand_dims(img_array, 0)
    for batch in datagen.flow(img_array_exp, batch_size=1):
        augmented_img = batch[0]
        augmented_data.append((augmented_img, label))
        break  
data += augmented_data
print("Total samples after augmentation:", len(data))

Augmenting (1 per image): 100%|███████████████████████████████████████| 10000/10000 [00:30<00:00, 328.59it/s]

Total samples after augmentation: 20000





# Extracting Inputs and Features

In [9]:
X_images = np.array([item[0] for item in data])
y_labels = np.array([item[1] for item in data])

print("Images array shape:", X_images.shape)
print("Labels array shape:", y_labels.shape)

Images array shape: (20000, 64, 64, 3)
Labels array shape: (20000,)


# Feature Extraction 

In [11]:
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(64,64,3), pooling="avg")
features = base_model.predict(preprocess_input(X_images), batch_size=32, verbose=1)
print("Extracted feature shape:", features.shape)

  base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(64,64,3), pooling="avg")


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 206ms/step
Extracted feature shape: (20000, 1280)


# Train-Test Split

In [12]:
X_train, X_test, y_train, y_test, img_train, img_test = train_test_split(
    features, y_labels, X_images, test_size=0.2, random_state=42, stratify=y_labels
)
print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

Training samples: 16000
Testing samples: 4000
