# Cat Vs Dog Classification Model From Scratch

### Required Libraries

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### Check for CPU/GPU

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device",device)

Device cuda


### transformations for better result

In [4]:
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

### Load training and testing data

In [5]:
train_data = datasets.ImageFolder(root="/kaggle/input/dogs-vs-cats/train",transform=train_transform)
test_data = datasets.ImageFolder(root="/kaggle/input/dogs-vs-cats/test",transform=test_transform)

In [6]:
train_loader = DataLoader(train_data,batch_size=256,shuffle=True,pin_memory=True,num_workers=4,persistent_workers=True,prefetch_factor=4)
test_loader = DataLoader(test_data,batch_size=256,shuffle=False,pin_memory=True,num_workers=4,persistent_workers=True,prefetch_factor=4)

### CNN Model definition

In [7]:
class CNNModel(nn.Module):
    def __init__(self, in_channels=3, num_classes=1):  # binary classification
        super(CNNModel, self).__init__()

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(in_channels, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 112x112

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 56x56

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 28x28

            # Block 4
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 14x14
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*14*14, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()  # since BCELoss
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

### Important parameters

In [8]:
epochs = 50
lr = 1e-3
model = CNNModel(3)
optimizer = optim.Adam(model.parameters(),lr=lr)
criterion = nn.BCELoss()
if torch.cuda.device_count() > 1:
    print("Using",torch.cuda.device_count(),"GPUs")
    model = nn.DataParallel(model)
model = model.to(device)

Using 2 GPUs


### training loop

In [10]:
import copy

patience = 5   # stop after 5 epochs with no improvement
best_val_loss = float("inf")
epochs_no_improve = 0
best_model_wts = copy.deepcopy(model.state_dict())

model.train()
for epoch in range(epochs):
    total_epoch_loss = 0
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        loss.backward()
        optimizer.step()
        total_epoch_loss += loss.item()

    avg_train_loss = total_epoch_loss / len(train_loader)

    # ---- Validation Loop ----
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(images)
            loss = criterion(outputs, labels.float().unsqueeze(1))
            val_loss += loss.item()
    avg_val_loss = val_loss / len(test_loader)

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    # ---- Early Stopping Check ----
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0
        print("Validation loss improved ✅ model saved")
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve} epochs")

    if epochs_no_improve >= patience:
        print("Early stopping triggered 🚨")
        break

# restore best weights
model.load_state_dict(best_model_wts)
print("Training complete. Best Val Loss:", best_val_loss)

Epoch 1/50 | Train Loss: 0.6233 | Val Loss: 0.6149
Validation loss improved ✅ model saved
Epoch 2/50 | Train Loss: 0.6108 | Val Loss: 0.6308
No improvement for 1 epochs
Epoch 3/50 | Train Loss: 0.5877 | Val Loss: 0.6270
No improvement for 2 epochs
Epoch 4/50 | Train Loss: 0.5769 | Val Loss: 0.5886
Validation loss improved ✅ model saved
Epoch 5/50 | Train Loss: 0.5611 | Val Loss: 0.5749
Validation loss improved ✅ model saved
Epoch 6/50 | Train Loss: 0.5442 | Val Loss: 0.5371
Validation loss improved ✅ model saved
Epoch 7/50 | Train Loss: 0.5171 | Val Loss: 0.5942
No improvement for 1 epochs
Epoch 8/50 | Train Loss: 0.5038 | Val Loss: 0.5628
No improvement for 2 epochs
Epoch 9/50 | Train Loss: 0.4816 | Val Loss: 0.8267
No improvement for 3 epochs
Epoch 10/50 | Train Loss: 0.4525 | Val Loss: 0.5973
No improvement for 4 epochs
Epoch 11/50 | Train Loss: 0.4259 | Val Loss: 0.4717
Validation loss improved ✅ model saved
Epoch 12/50 | Train Loss: 0.4176 | Val Loss: 0.8006
No improvement for 1 e

### Model evaluation

In [11]:
model.eval()
total,correct = 0,0
with torch.no_grad():
    for images,labels in test_loader:
        images,labels = images.to(device,non_blocking=True),labels.to(device,non_blocking=True)
        outputs = model(images)
        preds = (outputs > 0.5).long()  # threshold for binary
        correct += (preds.squeeze() == labels).sum().item()
        total += labels.size(0)
        
print("Testing accuracy:",correct/total)

Testing accuracy: 0.8594


In [12]:
model.eval()
total,correct = 0,0
with torch.no_grad():
    for images,labels in train_loader:
        images,labels = images.to(device,non_blocking=True),labels.to(device,non_blocking=True)
        outputs = model(images)
        preds = (outputs > 0.5).long()  # threshold for binary
        correct += (preds.squeeze() == labels).sum().item()
        total += labels.size(0)
        
print("Training accuracy:",correct/total)

Training accuracy: 0.86815


### Saving model with torchScript

In [13]:
if isinstance(model,torch.nn.DataParallel):
    model = model.module  # unwrap to the original model
m = torch.jit.script(model)
m.save("model.pt")

# Using transfer learning for improving accuracy

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device",device)

Device cuda


In [4]:
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

In [5]:
# Load training and testing data
train_data = datasets.ImageFolder(root="/kaggle/input/dogs-vs-cats/train",transform=train_transform)
test_data = datasets.ImageFolder(root="/kaggle/input/dogs-vs-cats/test",transform=test_transform)

In [6]:
train_loader = DataLoader(train_data,batch_size=128,shuffle=True,pin_memory=True,num_workers=4,persistent_workers=True,prefetch_factor=4)
test_loader = DataLoader(test_data,batch_size=128,shuffle=False,pin_memory=True,num_workers=4,persistent_workers=True,prefetch_factor=4)

In [7]:
resnet18 = models.resnet18(pretrained=True)
resnet18

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 90.0MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
for param in resnet18.parameters():
    param.requires_grad = False

In [9]:
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, 2)

In [10]:
resnet18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet18.fc.parameters(), lr=0.001)

In [12]:
if torch.cuda.device_count()>1:
    print("Using",torch.cuda.device_count(),"GPUs")
    resnet18 = nn.DataParallel(resnet18)
resnet18 = resnet18.to(device)

Using 2 GPUs


### training loop

In [16]:
epochs = 10
for epoch in range(epochs):
    resnet18.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device,non_blocking=True), labels.to(device,non_blocking=True)
        optimizer.zero_grad()
        outputs = resnet18(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss/len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 0.1660
Epoch [2/10], Loss: 0.0841
Epoch [3/10], Loss: 0.0723
Epoch [4/10], Loss: 0.0711
Epoch [5/10], Loss: 0.0651
Epoch [6/10], Loss: 0.0638
Epoch [7/10], Loss: 0.0651
Epoch [8/10], Loss: 0.0625
Epoch [9/10], Loss: 0.0596
Epoch [10/10], Loss: 0.0603


### Model evaluation

In [18]:
resnet18.eval()
total,correct = 0,0
with torch.no_grad():
    for images,labels in test_loader:
        images,labels = images.to(device,non_blocking=True),labels.to(device,non_blocking=True)
        outputs = resnet18(images)
        preds = torch.argmax(outputs,1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
print("Testing accuracy:",correct/total)

Testing accuracy: 0.9794


In [19]:
resnet18.eval()
total,correct = 0,0
with torch.no_grad():
    for images,labels in train_loader:
        images,labels = images.to(device,non_blocking=True),labels.to(device,non_blocking=True)
        outputs = resnet18(images)
        preds = torch.argmax(outputs,1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
print("Training accuracy:",correct/total)

Training accuracy: 0.98255


### Saving model with torchScript

In [20]:
if isinstance(resnet18,torch.nn.DataParallel):
    resnet18 = resnet18.module  # unwrap to the original model
m = torch.jit.script(resnet18)
m.save("cat_vs_dog_using_resnet18.pt")

### Prediction

In [26]:
import torch
from torchvision import transforms
from PIL import Image

resnet18.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

img_path = "/kaggle/input/dogs-vs-cats/train/cats/cat.11712.jpg"
image = Image.open(img_path).convert("RGB")

image = transform(image).unsqueeze(0)  # add batch dimension [1, C, H, W]
image = image.to(device)
classes = train_data.classes

with torch.no_grad():
    outputs = resnet18(image)
    print("Raw outputs:", outputs)

    preds = torch.argmax(outputs,1)
    print("Predicted class:",classes[preds.item()])

Raw outputs: tensor([[ 2.3434, -4.2085]], device='cuda:0')
Predicted class: cats
