In [1]:
import os
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 (standard for ResNet)
    transforms.ToTensor(),          # Convert image to PyTorch tensor
    transforms.Normalize(           # Normalize using ImageNet mean and std
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])


In [3]:
data_path = "cats_dogs_dataset/images/train"  # Adjust if needed

dataset = datasets.ImageFolder(root=data_path, transform=transform)

print(f"Number of images: {len(dataset)}")
print(f"Class names: {dataset.classes}")


Number of images: 3440
Class names: ['cats', 'dogs']


In [4]:
from torch.utils.data import random_split, DataLoader  # Import PyTorch utilities for splitting and loading data

# Set the batch size for training and validation
batch_size = 32  # You can adjust this based on your GPU/CPU memory

# Calculate the number of images for training (80%) and validation (20%)
train_size = int(0.8 * len(dataset))  # 80% of the data for training
val_size = len(dataset) - train_size  # The rest for validation

# Randomly split the dataset into training and validation sets
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create a DataLoader for the training set
# - batch_size: how many samples per batch to load
# - shuffle=True: shuffle the data at every epoch for better training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create a DataLoader for the validation set
# - shuffle=False: do not shuffle validation data, ensures consistent evaluation
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# (Optional) Inspect a batch from the training DataLoader to verify shapes and labels
images, labels = next(iter(train_loader))  # Get a single batch
print(f"Image batch shape: {images.shape}")  # Should be [batch_size, 3, 224, 224]
print(f"Labels batch shape: {labels.shape}")  # Should be [batch_size]
print(f"Labels: {labels}")  # Tensor of class indices (0 for cats, 1 for dogs)


Image batch shape: torch.Size([32, 3, 224, 224])
Labels batch shape: torch.Size([32])
Labels: tensor([1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
        0, 1, 0, 1, 1, 1, 0, 0])


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Define the CNN architecture
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        # First convolutional layer: input channels=3 (RGB), output channels=16, kernel size=3x3
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # Max pooling layer with 2x2 window
        # Second convolutional layer: input channels=16, output channels=32
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        # Fully connected layer: input features=32*56*56, output features=64
        self.fc1 = nn.Linear(32 * 56 * 56, 64)
        # Output layer: input features=64, output features=num_classes (cats and dogs)
        self.fc2 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Pass input through first conv, relu, and pool
        x = self.pool(self.relu(self.conv1(x)))
        # Pass through second conv, relu, and pool
        x = self.pool(self.relu(self.conv2(x)))
        # Flatten the tensor for the fully connected layers
        x = x.view(x.size(0), -1)
        # Pass through first fully connected layer and relu
        x = self.relu(self.fc1(x))
        # Output layer (no activation; handled by loss function)
        x = self.fc2(x)
        return x

# 2. Instantiate the model and move it to the device (GPU or CPU)
model = SimpleCNN(num_classes=2)
model = model.to(device)

# 3. Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001

# 4. Training loop (one epoch as an example)
num_epochs = 5  # You can increase this for better results

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device

        optimizer.zero_grad()             # Zero the parameter gradients
        outputs = model(images)           # Forward pass
        loss = criterion(outputs, labels) # Compute loss
        loss.backward()                   # Backward pass
        optimizer.step()                  # Update weights

        running_loss += loss.item() * images.size(0)  # Accumulate loss

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

print("Training complete.")


Epoch [1/5], Loss: 1.0460
Epoch [2/5], Loss: 0.6833
Epoch [3/5], Loss: 0.6455
Epoch [4/5], Loss: 0.5160
Epoch [5/5], Loss: 0.3131
Training complete.


In [6]:
model.eval()  # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0

with torch.no_grad():  # No need to compute gradients during evaluation
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        val_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

avg_val_loss = val_loss / len(val_loader.dataset)
accuracy = 100 * correct / total
print(f'Validation Loss: {avg_val_loss:.4f}, Accuracy: {accuracy:.2f}%')


Validation Loss: 0.5601, Accuracy: 71.22%
