In [8]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms

# 1. Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# 2. Create a dataset from the folder
dataset = ImageFolder(root="Dataset", transform=transform)
print("Classes:", dataset.classes)  # Should print ['Cats', 'Dogs']

# 3. (Optional) Split into train and val sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# 4. Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)


Classes: ['Cats', 'Dogs']


In [9]:
from torch.utils.data import random_split

train_size = int(0.8 * len(dataset))  # 80% for training
val_size   = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [10]:
from torch.utils.data import DataLoader

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


A Convolutional Neural Network (CNN) is very effective for image classification 
because it automatically learns to extract important features (or "embeddings") from images—such as edges, textures, and shapes—through convolutional layers.

This is the standard way to define a neural network in PyTorch. inherits from nn.Module


Fully Connected Layers

Flattening the output:
After three rounds of pooling on an image of size 128x128:
128 → 64 (after conv1 + pool)
64 → 32 (after conv2 + pool)
32 → 16 (after conv3 + pool)
The feature maps from the last convolution have dimensions 64 channels × 16 × 16.
self.fc1 = nn.Linear(64 * 16 * 16, 128) creates a fully connected layer that takes this flattened vector as input and outputs 128 features.
Output Layer:
self.fc2 = nn.Linear(128, 2) maps the 128 features to 2 output classes (Cats and Dogs).

Convolutional Layers: Learn features from images.

Pooling: Reduces image dimensions and helps capture important features.

Flattening: Prepares data for the fully connected layers.

Fully Connected Layers: Make the final prediction (classifying into two classes).

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module): 
    def __init__(self):
        super(SimpleCNN, self).__init__() #This line calls the initializer of the parent class (nn.Module), which is necessary for PyTorch to manage your model correctly.
        # 1) Convolutional layers
        # Input channels: 3 (for RGB images).
        # Output channels: 16 (the number of filters that will learn features).
        # Kernel size: 3 (each filter is a 3x3 matrix).
        # Padding: 1 (keeps the spatial dimensions the same before pooling).
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool  = nn.MaxPool2d(2, 2)
        
        # 2) Fully connected layers
        # After 3 poolings on a 128x128 input, feature map is 16x16
        self.fc1   = nn.Linear(64 * 16 * 16, 128)
        self.fc2   = nn.Linear(128, 2)  # 2 classes: Cats and Dogs

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 128 -> 64
        x = self.pool(F.relu(self.conv2(x)))  # 64  -> 32
        x = self.pool(F.relu(self.conv3(x)))  # 32  -> 16
        x = x.view(-1, 64 * 16 * 16)          # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN()
print(model)


SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [13]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
print(optimizer)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)


In [25]:
num_epochs = 10  # Start small for testing
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # 1) Zero the gradients
        optimizer.zero_grad()
        # 2) Forward pass
        outputs = model(images)
        # 3) Compute loss
        loss = criterion(outputs, labels)
        # 4) Backprop
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

    # Validation step
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total
    print(f"Validation Accuracy: {val_acc:.2f}%")

Epoch [1/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [2/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [3/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [4/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [5/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [6/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [7/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [8/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [9/10], Loss: 0.6098
Validation Accuracy: 57.27%
Epoch [10/10], Loss: 0.6098
Validation Accuracy: 57.27%


In [15]:
torch.save(model.state_dict(), "cats_vs_dogs_model.pth")

In [18]:
model = SimpleCNN()
model.load_state_dict(torch.load("cats_vs_dogs_model.pth"))
model.eval()

SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)