Now we are going to go and standardise and normalise these images using this resizing and normalisation matrix.

Lets import all our required modules.


In [None]:
# standard

import random
import time
import os
from collections import Counter


# numerical

import numpy as np
import pandas as pd


# torch


import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models, datasets, transforms

# resnet
from torchvision.models import ResNet50_Weights

#pil
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True # got a weird error sometimes if i dindt have this

# scikit-learn

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    davies_bouldin_score,
    silhouette_score,
    normalized_mutual_info_score,
    adjusted_rand_score,
    precision_recall_fscore_support,
    roc_curve,
    auc
)



from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import umap
from tqdm import tqdm
import joblib
import matplotlib.pyplot as plt

And then we are going to set our Random seed. This makes our tests reproducable.

In [None]:
SEED = 67
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

this is our transform in order to normalse the data. We scale these images to 224x224 ( as this is the resnet input size ).

In [None]:
from torchvision import transforms

transform  = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

Now we check if CUDA is available, and define our extract features function

In [None]:


if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print("Using:", device)

# Prepare Model (remove final layer for features)
resnet = models.resnet50(weights=ResNet50_Weights.DEFAULT)
model = nn.Sequential(*(list(resnet.children())[:-1]))  # outputs [batch, 2048, 1, 1]
model.eval()
model.to(device)

def extract_features(batch_images):
    with torch.no_grad():
        batch_images = batch_images.to(device)
        feats = model(batch_images)                 # [batch, 2048, 1, 1]
        feats = feats.reshape(feats.size(0), -1)    # [batch, 2048]
        return feats.cpu().numpy()


Now to check if the image folder exists, and load the images.
We will load from disk if the feature extraction has been done already.

In [None]:
if not os.path.exists("data/cats-and-dogs"):
    raise FileNotFoundError("cats-and-dogs image folder not found")

dataset_info = datasets.ImageFolder("data/cats-and-dogs", transform=transform)
counts = {}

for _ , label in dataset_info.samples:
    counts[label] = counts.get(label, 0) + 1

name_counts = {dataset_info.classes[i]: counts.get(i, 0) for i in range(len(dataset_info.classes))}



We now have to extract features.
We check a file first as this is an expensive computation that doesnt need repeating needlessly.
We use TQDM in order to provide us with a progress bar. This is essential for long operations such as this one.

In [None]:

FORCE_EXTRACT = False       # change to true if you have made changes to the feature extraction


dataset = datasets.ImageFolder("data/cats-and-dogs", transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)


if (not FORCE_EXTRACT) and os.path.exists("data/features/cats-and-dogs_features.npy") and os.path.exists("data/features/cats-and-dogs_labels.npy"):
    train_features = np.load("data/features/cats-and-dogs_features.npy")
    train_labels = np.load("data/features/cats-and-dogs_labels.npy")
    print("Loaded features from file.")

else:
    train_features = []
    train_labels = []

    for imgs, labels in tqdm(loader, desc="Extracting features"):
        batch_feats = extract_features(imgs)
        train_features.append(batch_feats)
        train_labels.append(labels.numpy())

    train_features = np.concatenate(train_features, axis=0)
    train_labels = np.concatenate(train_labels, axis=0)

    print("Feature vectors shape:", train_features.shape)
    print("Labels shape:", train_labels.shape)



    os.makedirs("data/features", exist_ok=True)
    np.save("data/features/cats-and-dogs_features.npy", train_features)
    np.save("data/features/cats-and-dogs_labels.npy", train_labels)


Now we will try a few methods of dimensionality reduction.
We will use T-SNE, PCA and UMAP and compare results.

In [None]:
reducer = umap.UMAP(n_components=2, random_state=67)
features_2D_UMAP = reducer.fit_transform(train_features)


plt.figure(figsize=(8,6))
plt.scatter(features_2D_UMAP[:,0], features_2D_UMAP[:,1], c=train_labels, cmap='tab10', s=10, alpha=0.7)
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.title("Feature space projected to 2D with UMAP")
plt.show()



In [None]:

reducer = PCA(n_components=0.95)
features_2D_PCA = reducer.fit_transform(train_features)


plt.figure(figsize=(8,6))
plt.scatter(features_2D_PCA[:,0], features_2D_PCA[:,1], c=train_labels, cmap='tab10', s=10, alpha=0.7)
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.title("Feature space projected to 2D with PCA")
plt.show()

In [None]:
#t-sne





reducer = TSNE(n_components=2, random_state=67)
features_2D_TSNE = reducer.fit_transform(train_features)


plt.figure(figsize=(8,6))
plt.scatter(features_2D_TSNE[:,0], features_2D_TSNE[:,1], c=train_labels, cmap='tab10', s=10, alpha=0.7)
plt.xlabel("T-SNE 1")
plt.ylabel("T-SNE 2")
plt.title("Feature space projected to 2D with T-SNE")
plt.show()

In [None]:


n_clusters = len(np.unique(train_labels))  # or choose the “true” number or experiment with k
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(features_2D_UMAP)  # features_2D from UMAP/PCA/t-SNE

# Visualize clusters
import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
plt.scatter(features_2D_UMAP[:,0], features_2D_UMAP[:,1], c=cluster_labels, cmap='tab20', s=12, alpha=0.8)
plt.title('K-Means Clusters in UMAP Space')
plt.show()

In [None]:

db_score = davies_bouldin_score(features_2D_UMAP, cluster_labels)
sil_score = silhouette_score(features_2D_UMAP, cluster_labels)
print("Davies-Bouldin Index:", db_score)
print("Silhouette Score:", sil_score)

In [None]:

x_train, x_test, y_train, y_test = train_test_split(
    train_features, train_labels, test_size=0.2, random_state=67, stratify=train_labels
)


clf = make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000, multi_class="multinomial", solver="lbfgs", random_state=67))

clf.fit(x_train, y_train)

joblib.dump(clf, "catsanddogsmodel1.joblib")
print("Saved classifier")


y_test_pred = clf.predict(x_test)
print("Test accuracy:", accuracy_score(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=dataset.classes))


# Create confusion matrix
cm = confusion_matrix(y_test, y_test_pred)

fig, ax = plt.subplots(figsize=(5,4))
im = ax.imshow(cm, cmap='Blues')
ax.set_xticks(np.arange(len(dataset.classes)))
ax.set_yticks(np.arange(len(dataset.classes)))
ax.set_xticklabels(dataset.classes)
ax.set_yticklabels(dataset.classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (test)')
# annotate cells
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, f"{cm[i,j]:d}", ha='center', va='center', color='black')
fig.colorbar(im, ax=ax)
plt.show()