In [5]:
import os
import zipfile

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader


# Pre-requist for this notebook
Run the cell if you have not downloaded the cat dog dataset.

In [4]:
def download_kaggle_dataset(dataset_name, destination_folder):
    """
    Downloads a Kaggle dataset and extracts it to the specified folder.
    
    :param dataset_name: str, the name of the Kaggle dataset (e.g., 'zillow/zecon')
    :param destination_folder: str, the folder where the dataset will be extracted
    """
    from kaggle.api.kaggle_api_extended import KaggleApi
    api = KaggleApi()
    api.authenticate()
    api.dataset_download_files('tongpython/cat-and-dog', path='data', unzip=True)

    # Step 2: Organize into train/val folders
    import shutil
    from sklearn.model_selection import train_test_split

    source_dir = "kaggle_data/cat_and_dog"
    train_dir = "kaggle_data/train"
    val_dir = "kaggle_data/val"
    os.makedirs(train_dir + "/cats", exist_ok=True)
    os.makedirs(train_dir + "/dogs", exist_ok=True)
    os.makedirs(val_dir + "/cats", exist_ok=True)
    os.makedirs(val_dir + "/dogs", exist_ok=True)

    all_files = os.listdir(source_dir)
    cat_files = [f for f in all_files if 'cat' in f]
    dog_files = [f for f in all_files if 'dog' in f]

    train_cats, val_cats = train_test_split(cat_files, test_size=0.2, random_state=42)
    train_dogs, val_dogs = train_test_split(dog_files, test_size=0.2, random_state=42)

    for f in files:
        shutil.copy(os.path.join(src, f), os.path.join(dst, f))

    copy_files(train_cats, source_dir, train_dir + "/cats")
    copy_files(val_cats, source_dir, val_dir + "/cats")
    copy_files(train_dogs, source_dir, train_dir + "/dogs")
    copy_files(val_dogs, source_dir, val_dir + "/dogs")

    print(f"Dataset downloaded and organized into {train_dir} and {val_dir}")

# download_kaggle_dataset('tongpython/cat-and-dog', 'kaggle_data')


# Check if a Cuda GPU is in Use


In [None]:
# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

Using device: cuda


In [None]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

In [None]:
train_dataset = datasets.ImageFolder('kaggle_data/train', transform=transform)
val_dataset = datasets.ImageFolder('kaggle_data/val', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Set up the Transfer Learning Model


In [None]:
# Model setup
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

# Training the model

In [None]:
# Training loop
for epoch in range(5):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch+1}: Loss={running_loss:.4f}, Accuracy={100 * correct / total:.2f}%")

# Evaluate the Model

In [None]:
# Save the model
torch.save(model.state_dict(), "resnet18_transfer.pth")
# Validation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Validation Accuracy: {100 * correct / total:.2f}%')