In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dog-and-cat-detection/annotations/Cats_Test2888.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test1617.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test83.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test899.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test1204.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test603.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test2365.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test380.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test1300.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test3380.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test3246.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test1979.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test147.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test3153.xml
/kaggle/input/dog-and-cat-detection/annotations/Cats_Test985.xml
/kaggle/input/dog

In [2]:
# Python code extracted from the uploaded image
import kagglehub

# Download the latest dataset version
data_dir = kagglehub.dataset_download("andrewmvd/dog-and-cat-detection")
print("Path to dataset files:", data_dir)


Path to dataset files: /kaggle/input/dog-and-cat-detection


In [3]:
# Importing necessary libraries
import os
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torchvision.models.resnet import ResNet18_Weights


In [4]:
# Dataset Class
class ImageDataset(Dataset):
    def __init__(self, annotations_dir, image_dir, transform=None):
        self.annotations_dir = annotations_dir
        self.image_dir = image_dir
        self.transform = transform
        self.image_files = self.filter_images_with_multiple_objects()

    def filter_images_with_multiple_objects(self):
        valid_image_files = []
        for f in os.listdir(self.image_dir):
            if os.path.isfile(os.path.join(self.image_dir, f)):
                img_name = f
                annotation_name = os.path.splitext(img_name)[0] + ".xml"
                annotation_path = os.path.join(self.annotations_dir, annotation_name)

                # Keep images that have a single object
                if self.count_objects_in_annotation(annotation_path) <= 1:
                    valid_image_files.append(img_name)
                else:
                    print(f"Image {img_name} has multiple objects and will be excluded from the dataset")
        return valid_image_files

    def count_objects_in_annotation(self, annotation_path):
        try:
            tree = ET.parse(annotation_path)
            root = tree.getroot()
            count = 0
            for obj in root.findall("object"):
                count += 1
            return count
        except FileNotFoundError:
            return 0

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Image path
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load image
        image = Image.open(img_path).convert("RGB")

        # Annotation path
        annotation_name = os.path.splitext(img_name)[0] + ".xml"
        annotation_path = os.path.join(self.annotations_dir, annotation_name)

        # Parse annotation
        label = self.parse_annotation(annotation_path)

        if self.transform:
            image = self.transform(image)

        return image, label

    def parse_annotation(self, annotation_path):
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        label = None
        for obj in root.findall("object"):
            name = obj.find("name").text
            if label is None:  # Take the first label for now (we work with 1 label per image)
                label = name

        # Convert label to numerical representation (0 for cat, 1 for dog)
        label_num = 0 if label == "cat" else 1 if label == "dog" else -1

        return label_num


In [5]:
# Data directory
annotations_dir = os.path.join(data_dir, 'annotations')
image_dir = os.path.join(data_dir, 'images')

# Get list of image files and create a dummy dataframe to split the data
image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]
df = pd.DataFrame({'image_name': image_files})

# Split data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)


In [6]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize (ImageNet stats)
])

# Datasets
train_dataset = ImageDataset(annotations_dir, image_dir, transform=transform)
val_dataset = ImageDataset(annotations_dir, image_dir, transform=transform)

# Filter datasets based on train_df and val_df
train_dataset.image_files = [f for f in train_dataset.image_files if f in train_df['image_name'].values]
val_dataset.image_files = [f for f in val_dataset.image_files if f in val_df['image_name'].values]

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Shuffle for training
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)  # No shuffle for validation


Image Cats_Test736.png has multiple objects and will be excluded from the dataset
Image Cats_Test736.png has multiple objects and will be excluded from the dataset


In [7]:
# Model
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)  # Load pre-trained ResNet-18 model
num_ftrs = model.fc.in_features  # Get the number of input features to the fully connected layer
model.fc = nn.Linear(num_ftrs, 2)  # Modify the last layer for 2 classes: cat and dog

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available
model.to(device)  # Move the model to the selected device

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()  # Use Cross Entropy Loss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001

# Show model summary
print(model)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 196MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    # Training phase
    model.train()  
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device)
        targets = targets.to(device)

        scores = model(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()  
        loss.backward()  
        optimizer.step() 

    model.eval()  
    with torch.no_grad():  
        correct = 0
        total = 0
        for data, targets in val_loader:
            data = data.to(device)
            targets = targets.to(device)
            
            scores = model(data)
            _, predictions = scores.max(1)  
            correct += (predictions == targets).sum()  
            total += targets.size(0) 
        
        # Calculate validation accuracy
        accuracy = float(correct) / float(total) * 100
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {accuracy:.2f}%")


Epoch 1/10, Validation Accuracy: 78.32%
Epoch 2/10, Validation Accuracy: 92.55%
Epoch 3/10, Validation Accuracy: 94.17%
Epoch 4/10, Validation Accuracy: 92.55%
Epoch 5/10, Validation Accuracy: 90.24%
Epoch 6/10, Validation Accuracy: 95.66%
Epoch 7/10, Validation Accuracy: 89.43%
Epoch 8/10, Validation Accuracy: 89.43%
Epoch 9/10, Validation Accuracy: 94.58%
Epoch 10/10, Validation Accuracy: 93.90%
