In [None]:
!pip install torch torchvision matplotlib

In [None]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [None]:
data_dir = r"D:\AFC\Cat vs Dog\datasets\datasets"

train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
test_dir = os.path.join(data_dir, "test")

BATCH_SIZE = 32
IMG_SIZE = 128


In [None]:
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

In [None]:
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
from torchvision.datasets import ImageFolder
from torchvision.io import read_image
from torch.utils.data import Dataset
from PIL import Image

class TestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.image_paths = sorted([os.path.join(test_dir, img) for img in os.listdir(test_dir) if img.endswith('.jpg')])
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        img_id = os.path.basename(img_path)
        return img, img_id

test_dataset = TestDataset(test_dir, transform=test_transform)


In [None]:
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
import numpy as np

In [None]:
# Visualize one batch of training data
import numpy as np

def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

dataiter = iter(train_loader)
images, labels = next(dataiter)
imshow(torchvision.utils.make_grid(images[:8]))
print("Labels:", labels[:8].tolist())


Model Selection – Custom CNN

In [21]:
import torch.nn as nn
import torch.nn.functional as F

class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # input: 3x128x128 -> 32x128x128
        self.pool = nn.MaxPool2d(2, 2)  # Downsample by 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # -> 64x64x64
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # -> 128x32x32

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 2)  # Output: 2 classes (cat, dog)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # -> 32x64x64
        x = self.pool(F.relu(self.conv2(x)))  # -> 64x32x32
        x = self.pool(F.relu(self.conv3(x)))  # -> 128x16x16
        x = x.view(-1, 128 * 16 * 16)  # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

Model Training

In [None]:
import torch.optim as optim

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Instantiate the model and move it to device
model = CustomCNN().to(device)

# Loss function & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
EPOCHS = 5

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {running_loss:.3f} - Train Acc: {train_accuracy:.2f}%")

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            val_outputs = model(val_images)
            loss = criterion(val_outputs, val_labels)
            val_loss += loss.item()

            _, val_predicted = torch.max(val_outputs, 1)
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    print(f"           -> Val Loss: {val_loss:.3f} - Val Acc: {val_accuracy:.2f}%\n")


In [None]:
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")