In [10]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch 
from torchmetrics import Accuracy
import torchvision
from torch import nn
from torch.utils.data import DataLoader
import pickle
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [11]:
class CNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=True)
        self.pool= nn.MaxPool2d(2,2)
        self.batchnorm1=nn.BatchNorm2d(32)
        self.batchnorm2=nn.BatchNorm2d(64)
        self.batchnorm3=nn.BatchNorm2d(128)
        self.batchnorm4=nn.BatchNorm1d(16)
        self.linear_layer1 = nn.Linear(in_features = 128 * 16 * 16, out_features = 16)
        self.linear_layer2 = nn.Linear(in_features = 16, out_features = 2)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr = 0.001)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def forward(self, x):
        x = nn.functional.relu(self.batchnorm1(self.conv1(x)))
        x = self.pool(x)
        
        x = nn.functional.relu(self.batchnorm2(self.conv2(x)))
        x = self.pool(x)

        x = nn.functional.relu(self.batchnorm3(self.conv3(x)))
        x = self.pool(x)
        x = torch.flatten(x, 1)

        x = nn.functional.relu(self.batchnorm4(self.linear_layer1(x)))
        x = nn.functional.relu(self.linear_layer2(x))
        
        return x

    def accuracy_fn(self,y_true, y_pred):
        return Accuracy(task="multiclass", num_classes=10).to(device)
    
    def fit(self, train_loader, epochs):
        torch.manual_seed(42)
        for epoch in range(epochs):
            self.train()
            total_loss=0 
            
            for batch_idx, (images, labels) in enumerate(train_loader):
                            
                images = images.to(model.device)
                labels = labels.to(model.device)
                
                outputs = model(images)
                loss = self.loss_fn(outputs,labels)
                
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()
                
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

    def predict(self, X):
        X= X.to(self.device)
        self.eval()
        with torch.no_grad():
            logits = self.forward(X)
            probs = torch.sigmoid(logits)
            preds = (probs > 0.35).int()
        return preds

### To resize and convert to tensors

In [12]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),           # Randomly flip images
    transforms.RandomRotation(10),               # Random rotation up to 10 degrees
    transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),  # Random crop and resize
    transforms.ColorJitter(brightness=0.2, contrast=0.2), # Color variation
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),         # Normalize for RGB
                         (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])


### To apply the above transformation and also split the dataset into train and test

In [24]:
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset,random_split
import torch
from PIL import Image

class VerifiedImageFolder(ImageFolder):
    def __getitem__(self, index):
        # keep trying next images if current one fails
        while True:
            try:
                return super().__getitem__(index)
            except (IOError, OSError):
                index = (index + 1) % len(self.samples)
                
full_dataset = VerifiedImageFolder(
    root="/kaggle/input/microsoft-catsvsdogs-dataset/PetImages",
    transform=None
)

# Split into train and test
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Assign transforms to each split
train_dataset.dataset.transform = train_transform
test_dataset.dataset.transform = test_transform

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)


In [29]:
model= CNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [30]:
model.to(model.device)
model.fit(train_loader, epochs=15)
torch.save(model.state_dict(), "model.pkl")



Epoch [1/15], Loss: 0.5247




Epoch [2/15], Loss: 0.4091




Epoch [3/15], Loss: 0.3428




Epoch [4/15], Loss: 0.2910




Epoch [5/15], Loss: 0.2375




Epoch [6/15], Loss: 0.1802




Epoch [7/15], Loss: 0.1403




Epoch [8/15], Loss: 0.1028




Epoch [9/15], Loss: 0.0770




Epoch [10/15], Loss: 0.0706




Epoch [11/15], Loss: 0.0520




Epoch [12/15], Loss: 0.0488




Epoch [13/15], Loss: 0.0499




Epoch [14/15], Loss: 0.0337




Epoch [15/15], Loss: 0.0365


In [31]:
model_instance = CNN()
model_instance.load_state_dict(torch.load("model.pkl"))
model_instance.to(model_instance.device)

  model_instance.load_state_dict(torch.load("model.pkl"))


CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batchnorm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm4): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear_layer1): Linear(in_features=32768, out_features=16, bias=True)
  (linear_layer2): Linear(in_features=16, out_features=2, bias=True)
  (loss_fn): CrossEntropyLoss()
)

In [32]:
model_instance.eval()
test_loss = 0.0
test_correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(model_instance.device), labels.to(model_instance.device)

        outputs = model(images)
        loss = model_instance.loss_fn(outputs, labels)
        test_loss += loss.item()

        preds = torch.argmax(outputs, dim=1)
        test_correct += (preds == labels).sum().item()
        total += labels.size(0)

avg_test_loss = test_loss / len(test_loader)
test_accuracy = test_correct / total

print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy*100:.2f}%")

Test Loss: 0.5816, Test Accuracy: 84.36%
