In [1]:
# !conda install -y torchvision

import cv2
from PIL import Image, ImageOps
import math
import time

# PyTorch Library
import torch


# PyTorch Neural Network
import torch.nn as nn

# Allows us to transform data
import torchvision.transforms as transforms

# Allows us to get the digit dataset
import torchvision.datasets as dsets

# Creating graphs
import torchvision.models as models
import matplotlib.pylab as plt
from transformers import pipeline

from tqdm import tqdm


# Allows us to use arrays to manipulate and store data
import numpy as np
import kagglehub
import os

path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")


print("Path to dataset files:", path)

Path to dataset files: C:\Users\amann\.cache\kagglehub\datasets\karakaggle\kaggle-cat-vs-dog-dataset\versions\1


In [2]:
cat = os.path.join(path, "kagglecatsanddogs_3367a\PetImages\Cat")
dog = os.path.join(path, "kagglecatsanddogs_3367a\PetImages\Dog")


if os.path.exists(cat):
    print("Cat folder exists")
if os.path.exists(dog):
    print("Dog folder exists")

is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

print(device)

Cat folder exists
Dog folder exists
GPU is available
cuda


In [3]:
class CatDogDataset(torch.utils.data.Dataset):
    def __init__(self, cat_dir, dog_dir, transform=None, input_size=128 * 128):
        self.cat_dir = cat_dir
        self.dog_dir = dog_dir
        self.inputSize = int(math.sqrt(input_size))
        self.transform = transform

        # Get all cat and dog image paths
        self.cat_images = [
            os.path.join(cat_dir, f)
            for f in os.listdir(cat_dir)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]
        self.dog_images = [
            os.path.join(dog_dir, f)
            for f in os.listdir(dog_dir)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]

        # Combine all images and create labels
        self.all_images = self.cat_images + self.dog_images
        self.labels = [0] * len(self.cat_images) + [1] * len(
            self.dog_images
        )  # 0 for cat, 1 for dog

    def gray_scale(self, image):
        return image.convert("L")

    def resize(self, image):

        return image.resize((self.inputSize, self.inputSize))

    def to_tensor(self, image):
        return torch.from_numpy(np.array(image)).float()

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self, idx):
        try:

            image_path = self.all_images[idx]
            label = self.labels[idx]

            image = Image.open(image_path)

            if self.transform:
                image = self.transform(image)
            else:
                print(f"not transformed")
                image = self.gray_scale(image)

                image = self.resize(image)
                image = self.to_tensor(image)

            # Add channel dimension: (128, 128) -> (1, 128, 128)
            # image = image.unsqueeze(0)
            # print(f"image.shape from class: {image.shape}

            return image, label
        except Exception as e:
            print(f"Error loading image {idx}: {e}")

            # Return a dummy image and label if there's an error
            return torch.zeros(1, self.inputSize, self.inputSize), 0

In [4]:
cat_dir = os.listdir(cat)
dog_dir = os.listdir(dog)

# Load a sample image for display
cat_image = Image.open(os.path.join(cat, cat_dir[3]))
cat_image_gray = cat_image.convert("L")  # Convert to grayscale
width, height = cat_image.size
print(f"Image dimensions: {width} x {height}")

# Create the combined dataset


input_size = 224 * 224
output_size = 2

mean = [0.485, 0.456, 0.406]

std = [0.229, 0.224, 0.225]

composed = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=5),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ]
)

full_dataset = CatDogDataset(cat, dog, transform=composed, input_size=input_size)
print(f"Total images: {len(full_dataset)}")
print(f"Cat images: {len(full_dataset.cat_images)}")
print(f"Dog images: {len(full_dataset.dog_images)}")
print(f"Dimensions: {full_dataset.inputSize}")

Image dimensions: 500 x 375
Total images: 24959
Cat images: 12490
Dog images: 12469
Dimensions: 224


In [5]:
def plot_image(image):
    plt.imshow(image, cmap="gray")
    plt.axis("off")
    plt.show()


def show_data(image, input_size):
    plt.imshow(image.resize((input_size, input_size)), cmap="gray")

In [6]:
image, label = full_dataset[1]

# plot_image(image)

In [7]:
def PlotParameters(model, input_size):
    W = model.state_dict()["linear.weight"].data
    w_min = W.min().item()
    w_max = W.max().item()
    fig, axes = plt.subplots(1, 2)
    fig.subplots_adjust(hspace=0.01, wspace=0.1)
    for i, ax in enumerate(axes.flat):
        if i < 10:

            # Set the label for the sub-plot.
            ax.set_xlabel("class: {0}".format(i))

            # Plot the image.
            ax.imshow(
                W[i, :].view(int(math.sqrt(input_size)), int(math.sqrt(input_size))),
                vmin=w_min,
                vmax=w_max,
                cmap="seismic",
            )

            ax.set_xticks([])
            ax.set_yticks([])

        # Ensure the plot is shown correctly with multiple plots
        # in a single Notebook cell.
    plt.show()

In [8]:
class linear_model(nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linear_model, self).__init__()
        print("linear_model class initialized")
        self.linear = nn.Linear(inputSize, outputSize)

    def forward(self, x):
        return self.linear(x)

In [9]:
# Split dataset into train and validation
total_size = len(full_dataset)
train_size = int(0.8 * total_size)  # 80% for training
val_size = total_size - train_size  # 20% for validation

print(f"Total dataset size: {total_size}")
print(f"Training size: {train_size}")
print(f"Validation size: {val_size}")

# Create train and validation datasets
train_dataset, val_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size]
)

# Create data loaders
batch_size = 32
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False
)

print(f"Training batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")


# Test the data loader
sample_batch = next(iter(train_loader))
images, labels = sample_batch

Total dataset size: 24959
Training size: 19967
Validation size: 4992
Training batches: 624
Validation batches: 156


In [10]:
print(input_size, output_size)

# model = linear_model(input_size, output_size)
model = models.resnet18(pretrained=True)
print(model)
model.fc = nn.Linear(224 * 224, 2)
model.to("cuda")
lr_scheduler = True

base_lr = 0.001

max_lr = 0.01

momentum = 0.9

model.to("cuda")

learning_rate = 5e-05

print(model.parameters())

optimizer_SGD = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

optimizer_Adam = torch.optim.Adam(model.parameters(), lr=learning_rate)

if lr_scheduler:
    scheduler = torch.optim.lr_scheduler.CyclicLR(
        optimizer_SGD,
        base_lr=base_lr,
        max_lr=max_lr,
        step_size_up=5,
        mode="triangular2",
    )

criterion = nn.CrossEntropyLoss()

epochs = 3


def train_model(
    model, train_loader, val_loader, criterion, optimizer, epochs, print_=True
):
    loss_list = []
    accuracy_list = []
    correct = 0
    # global:val_loader
    n_test = len(val_dataset)
    accuracy_best = 0

    for epoch in tqdm(range(epochs)):

        loss_sublist = []
        # Loop through the data in loader

        for x, y in train_loader:
            x, y = x.to("cuda"), y.to("cuda")

            model.train()

            z = model(x)
            loss = criterion(z, y)
            loss_sublist.append(loss.data.item())
            loss.backward()
            optimizer.step()

            optimizer.zero_grad()
        print("epoch {} done".format(epoch))

        scheduler.step()
        loss_list.append(np.mean(loss_sublist))
        correct = 0

        for x_test, y_test in val_loader:
            x_test, y_test = x_test.to("cuda"), y_test.to("cuda")
            model.eval()
            z = model(x_test)
            _, yhat = torch.max(z.data, 1)
            correct += (yhat == y_test).sum().item()
        accuracy = correct / n_test
        accuracy_list.append(accuracy)
        if accuracy > accuracy_best:
            accuracy_best = accuracy
            # best_model_wts = copy.deepcopy(model.state_dict())

        if print_:
            print("learning rate", optimizer.param_groups[0]["lr"])
            print(
                "The validaion  Cost for each epoch "
                + str(epoch + 1)
                + ": "
                + str(np.mean(loss_sublist))
            )
            print(
                "The validation accuracy for epoch "
                + str(epoch + 1)
                + ": "
                + str(accuracy)
            )
    # model.load_state_dict(best_model_wts)
    return accuracy_list, loss_list, model


accuracy_list, loss_list, model = train_model(
    model, train_loader, val_loader, criterion, optimizer_Adam, epochs
)
print(accuracy_list)
print(loss_list)

# PlotParameters(model, input_size)

50176 2
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=



<generator object Module.parameters at 0x00000281E76925E0>


  0%|          | 0/3 [00:01<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x512 and 50176x2)

In [32]:
class CatDogDatasetCNN(torch.utils.data.Dataset):
    def __init__(self, cat_dir, dog_dir, transform=None, input_size=128 * 128):
        self.cat_dir = cat_dir
        self.dog_dir = dog_dir
        self.inputSize = int(math.sqrt(input_size))
        self.transform = transform

        # Get all cat and dog image paths
        self.cat_images = [
            os.path.join(cat_dir, f)
            for f in os.listdir(cat_dir)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]
        self.dog_images = [
            os.path.join(dog_dir, f)
            for f in os.listdir(dog_dir)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]

        # Combine all images and create labels
        self.all_images = self.cat_images + self.dog_images
        self.labels = [0] * len(self.cat_images) + [1] * len(
            self.dog_images
        )  # 0 for cat, 1 for dog

    def gray_scale(self, image):
        return image.convert("L")

    def resize(self, image):
        return image.resize((self.inputSize, self.inputSize))

    def to_tensor(self, image):
        return torch.from_numpy(np.array(image)).float()

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self, idx):
        try:
            image_path = self.all_images[idx]
            label = self.labels[idx]

            image = Image.open(image_path)
            image = self.gray_scale(image)

            image = self.resize(image)
            image = self.to_tensor(image)

            image = image.unsqueeze(0)

            # print(f"image.shape from class: {image.shape}  ")

            if self.transform:
                image = self.transform(image)

            return image, label
        except Exception as e:
            print(f"Error loading image {idx}: {e}")

            # Return a dummy image and label if there's an error
            return torch.zeros(1, self.inputSize, self.inputSize), 0

In [33]:
class CNN(nn.Module):
    def __init__(self, num_classes: int = 2):  # dog vs cat
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, (3, 3), 1, 0)  # Output: [batch, 32, 26, 26]
        self.conv2 = nn.Conv2d(32, 64, (3, 3), 1, 0)  # Output: [batch, 64, 24, 24]
        self.conv3 = nn.Conv2d(64, 128, (2, 2), 2, 0)  # Output: [batch, 128, 12, 12]
        self.relu = nn.ReLU(inplace=True)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))  # Output: [batch, 128, 1, 1]
        self.fc = nn.Linear(128, num_classes)  # Output: [batch, 2]

    def forward(self, x):
        x = self.conv1(x)  # [batch, 32, 26, 26]
        x = self.relu(x)
        x = self.conv2(x)  # [batch, 64, 24, 24]
        x = self.relu(x)
        x = self.conv3(x)  # [batch, 128, 12, 12]
        x = self.relu(x)
        x = self.global_pool(x)  # [batch, 128, 1, 1]
        x = torch.flatten(x, 1)  # [batch, 128]
        logits = self.fc(x)  # [batch, 2]
        return logits

In [111]:
def train_model_CNN(model, train_loader, val_loader, criterion, optimizer, epochs):
    steps_loss = 2
    for epoch in range(epochs):
        start_time = time.time()
        running_loss = 0.0
        accuracy = []
        for i, (images, labels) in enumerate(train_loader):

            images, labels = images.to("cuda"), labels.to("cuda")
            # print(images.shape)
            # break
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                running_loss += loss.item()
                if i % steps_loss == (steps_loss - 1):
                    print("Step_loss: ", steps_loss)
                    # print every steps_loss mini-batches
                    np_pred = np.argmax(outputs.cpu().numpy(), -1)
                    np_gt = labels.cpu().numpy()
                    acc = np.sum(np_pred == np_gt) / batch_size
                    accuracy.append(acc)
                    # print(
                    #     "[Epoch {:2d} - Iter {:3d}] loss: {:.3f} acc: {:.3f}".format(
                    #         epoch + 1, i + 1, running_loss / steps_loss, acc
                    #     )
                    # )
                    running_loss = 0.0
                elapsed_time = time.time() - start_time
        print(
            f"Epoch {epoch + 1} - Iter {i + 1} - Loss: {running_loss / steps_loss:.3f} - Accuracy: {np.mean(accuracy)*100:.3f}. Time taken: {elapsed_time:.3f} seconds"
        )

    return model

In [112]:
# Split dataset into train and validation
full_dataset_CNN = CatDogDatasetCNN(cat, dog, transform=None, input_size=input_size)

total_size = len(full_dataset_CNN)
train_size = int(0.8 * total_size)  # 80% for training
val_size = total_size - train_size  # 20% for validation

print(f"Total dataset size: {total_size}")
print(f"Training size: {train_size}")
print(f"Validation size: {val_size}")

# Create train and validation datasets
train_dataset_CNN, val_dataset_CNN = torch.utils.data.random_split(
    full_dataset_CNN, [train_size, val_size]
)

# Create data loaders
batch_size = 32

train_loader_CNN = torch.utils.data.DataLoader(
    train_dataset_CNN, batch_size=batch_size, shuffle=True
)
val_loader_CNN = torch.utils.data.DataLoader(
    val_dataset_CNN, batch_size=batch_size, shuffle=False
)

print(f"Training batches: {len(train_loader_CNN)}")
print(f"Validation batches: {len(val_loader_CNN)}")


# Test the data loader
sample_batch = next(iter(train_loader_CNN))
images, labels = sample_batch

Total dataset size: 24959
Training size: 19967
Validation size: 4992
Training batches: 624
Validation batches: 156


In [114]:
learning_rate_CNN = 5e-05
print(int(math.sqrt(input_size)), output_size)

momentum = 0.9

model_CNN = CNN(output_size)
model_CNN.to("cuda")

print(model_CNN.parameters())

optimizer_SGD_CNN = torch.optim.SGD(
    model.parameters(), lr=learning_rate_CNN, momentum=momentum
)

optimizer_Adam_CNN = torch.optim.Adam(model.parameters(), lr=learning_rate_CNN)

criterion_CNN = nn.CrossEntropyLoss()

epochs_CNN = 3


# train_model_CNN(
#     model_CNN,
#     train_loader_CNN,
#     val_loader_CNN,
#     criterion_CNN,
#     optimizer_Adam_CNN,
#     epochs_CNN,
# )

224 2
<generator object Module.parameters at 0x000001D09F1F5620>
