## SECTION 1 - Imports

In [1]:
import cv2
import numpy as np
import glob
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import joblib


## SECTION 2 - Load Dataset

In [3]:
def load_images_from_folder(folder_path):
    data = []
    labels = []
    
    for label_name in ["Cat", "Dog"]:
        folder = os.path.join(folder_path, label_name)
        print("Looking inside:", folder)

        path = os.path.join(folder, "*")
        files_found = glob.glob(path)
        print("Files found:", len(files_found))   # DEBUG PRINT
        
        label = 0 if label_name == "Cat" else 1
        
        for file in files_found:
            img = cv2.imread(file)
            if img is None:
                continue
                
            img = cv2.resize(img, (32, 32))
            img = img / 255.0
            img = img.flatten()
            
            data.append(img)
            labels.append(label)
    
    return np.array(data), np.array(labels)


X_train, y_train = load_images_from_folder("./train")
X_test_real, y_test_real = load_images_from_folder("./test")


Looking inside: ./train\Cat
Files found: 1000
Looking inside: ./train\Dog
Files found: 1000
Looking inside: ./test\Cat
Files found: 5
Looking inside: ./test\Dog
Files found: 5


## SECTION 3 - Train/Test Split for Algorithms

In [4]:
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y_train, test_size=0.2, shuffle=True
)


## SECTION 4 - Preprocessing (StandardScaler)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_split)
X_val_scaled = scaler.transform(X_val_split)
X_test_scaled = scaler.transform(X_test_real)


## SECTION 5 - Train All ML Models

### 5.1 - KNN

In [6]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train_split)

knn_preds = knn.predict(X_val_scaled)
knn_acc = accuracy_score(y_val_split, knn_preds)
print("KNN Accuracy:", knn_acc)


KNN Accuracy: 0.535


### 5.2 - Logistic Regression

In [7]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_scaled, y_train_split)

log_preds = log_reg.predict(X_val_scaled)
log_acc = accuracy_score(y_val_split, log_preds)
print("Logistic Regression Accuracy:", log_acc)


Logistic Regression Accuracy: 0.5625


### 5.3 - Perceptron

In [8]:
percep = Perceptron()
percep.fit(X_train_scaled, y_train_split)

perc_preds = percep.predict(X_val_scaled)
perc_acc = accuracy_score(y_val_split, perc_preds)
print("Perceptron Accuracy:", perc_acc)


Perceptron Accuracy: 0.5475


### 5.4 - MLP Classifier (Multi-Layer Perceptron)

In [9]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32),
                    activation='logistic',
                    max_iter=500)
mlp.fit(X_train_scaled, y_train_split)

mlp_preds = mlp.predict(X_val_scaled)
mlp_acc = accuracy_score(y_val_split, mlp_preds)
print("MLP Accuracy:", mlp_acc)


MLP Accuracy: 0.575


## SECTION 6 - Compare Accuracies

In [10]:
results = {
    "KNN": knn_acc,
    "Logistic Regression": log_acc,
    "Perceptron": perc_acc,
    "MLP": mlp_acc
}

print("\nModel Comparison:")
for m, a in results.items():
    print(f"{m}: {a}")



Model Comparison:
KNN: 0.535
Logistic Regression: 0.5625
Perceptron: 0.5475
MLP: 0.575


## SECTION 7 - Save the Best Model

In [11]:
best_model_name = max(results, key=results.get)
print("\nBest Model:", best_model_name)

best_model = {
    "KNN": knn,
    "Logistic Regression": log_reg,
    "Perceptron": percep,
    "MLP": mlp
}[best_model_name]

joblib.dump(best_model, "best_cat_dog_model.r")
joblib.dump(scaler, "scaler.r")



Best Model: MLP


['scaler.r']

## SECTION 8 - Test on Internet Images

In [16]:
model = joblib.load("best_cat_dog_model.r")
scaler = joblib.load("scaler.r")

for file in glob.glob("internet_test/*"):
    img = cv2.imread(file)
    img_resized = cv2.resize(img, (32, 32))
    img_norm = img_resized / 255.0
    img_flat = img_norm.flatten().reshape(1, -1)

    img_scaled = scaler.transform(img_flat)
    pred = model.predict(img_scaled)[0]

    label = "Cat" if pred == 0 else "Dog"
    print(file, "->", label)


internet_test\pet1.jpg -> Dog
internet_test\pet2.jpeg -> Dog
internet_test\pet3.jpg -> Cat
internet_test\pet4.jpg -> Dog
internet_test\pet5.jpg -> Cat


## Was the model able to correctly predict the images?
When I tested the final model on a set of images downloaded from the internet, it was able to classify 3 out of 5 correctly. So the model worked reasonably well, but it wasnâ€™t perfect. It handled clear, front-facing images of cats and dogs well, but it struggled with images that had unusual angles, lighting, or backgrounds. This makes sense because the model is based on basic ML algorithms (KNN, Logistic Regression, Perceptron, MLP) rather than a deep CNN, so it has limited ability to understand complex features. Overall, it did correctly predict the majority of the test images, but there is still room for improvement.