In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt


In [2]:
DATA_DIR = "data/train"  # adjust path if needed
IMG_SIZE = 64     # Resize to 64x64
LIMIT = 3000    # Max images per class

categories = ["cat", "dog"]


In [3]:
data = []
labels = []

for label, category in enumerate(categories):
    path = os.path.join(DATA_DIR)
    count = 0

    for file in tqdm(os.listdir(path)):
        if file.startswith(category) and count < LIMIT:
            try:
                img_path = os.path.join(path, file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Grayscale
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))       # Resize
                data.append(img.flatten())                        # Flatten
                labels.append(label)                              # 0 = cat, 1 = dog
                count += 1
            except Exception as e:
                print("Error:", e)


100%|██████████████████████████████████████████████████████████████████████████| 25000/25000 [00:08<00:00, 2959.33it/s]
100%|██████████████████████████████████████████████████████████████████████████| 25000/25000 [00:07<00:00, 3170.03it/s]


In [4]:
X = np.array(data)
y = np.array(labels)

print("Data shape:", X.shape)
print("Labels shape:", y.shape)

# Optional: save as .npy for later
np.save("model/X.npy", X)
np.save("model/y.npy", y)


Data shape: (6000, 4096)
Labels shape: (6000,)


In [5]:
import numpy as np

X = np.load("model/X.npy")
y = np.load("model/y.npy")

print("X shape:", X.shape)
print("y shape:", y.shape)
print("Sample y values:", y[:10])


X shape: (6000, 4096)
y shape: (6000,)
Sample y values: [0 0 0 0 0 0 0 0 0 0]


In [6]:
import numpy as np

X = np.load("model/X.npy")
y = np.load("model/y.npy")


In [7]:
X_flattened = X.reshape(len(X), -1)


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.2, random_state=42)


In [9]:
from sklearn.svm import SVC

model = SVC(kernel='linear')  # You can also try 'rbf'
model.fit(X_train, y_train)


In [10]:
accuracy = model.score(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 52.33%


In [11]:
import joblib
joblib.dump(model, "model/svm_model.pkl")


['model/svm_model.pkl']