# Dogs vs. Cats


This Notebook include:
1.  Data preprocessing
2.  Load train, validation and test datasets
3.  Model define and training
4.  Whole model Fine-Tuning
5.  Predict test dataset

### 1. Data preprocessing

Define training transforms(with augmentaion) and validation transforms. This is because during testing, we want to evaluate the model's true performance on images that are "original" and consistent, rather than on randomly varied images.

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from PIL import Image
import os
import glob

# Define image size 
IMG_SIZE = 224

# Mean and std for ImageNet normalization
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# 1. Define transforms for training data (with augmentation)
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),      # 1. Resize the image
    transforms.RandomHorizontalFlip(),          # 2. Randomly flip the image horizontally
    transforms.RandomRotation(10),              # 3. Randomly rotate the image (+/- 10 degrees)
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1), # 4. Apply slight color jitter
    transforms.ToTensor(),                      # 5. Convert to PyTorch Tensor (scales to [0, 1])
    transforms.Normalize(mean=mean, std=std)    # 6. Normalize the tensor
])

# 2. Define transforms for validation & test data (no augmentation)
test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),      # 1. Resize the image
    transforms.ToTensor(),                      # 2. Convert to PyTorch Tensor
    transforms.Normalize(mean=mean, std=std)    # 3. Normalize the tensor
])

print("Data transforms defined successfully.")

  from .autonotebook import tqdm as notebook_tqdm


Data transforms defined successfully.


### 2. Load train，validation and test datasets

Creat two dataLoaders: one for training and one for validation.

In [2]:
from torch.utils.data import random_split
# 1. Define Data Paths 
DATA_DIR = './dataset' 

BATCH_SIZE = 64 

# 2. Create Dataset Instances

# Load the training set and apply training transforms
full_train_dataset = torchvision.datasets.CIFAR10(
    root=DATA_DIR, 
    train=True, 
    download=True, 
    transform=train_transform
)

# Load the validation set and apply test transforms (no augmentation)
test_dataset = torchvision.datasets.CIFAR10(
    root=DATA_DIR, 
    train=False, 
    download=True, 
    transform=test_transform
)

# 3. 从 "full_train_dataset" 中分割出 验证集 (Validation Set)
VAL_SPLIT_SIZE = 5000 # 设定 5000 张图片用于验证
TRAIN_SPLIT_SIZE = len(full_train_dataset) - VAL_SPLIT_SIZE # 剩余 45000 张用于训练
train_subset, val_subset = random_split(
    full_train_dataset, 
    [TRAIN_SPLIT_SIZE, VAL_SPLIT_SIZE],
    generator=torch.Generator().manual_seed(42) # 保证分割可复现
)

# 4. Check Labels
print(f"Total Cifar-10 train images: {len(full_train_dataset)}")
print(f"  -> Split into {len(train_subset)} for training")
print(f"  -> Split into {len(val_subset)} for validation")
print(f"Total Cifar-10 test images (holdout): {len(test_dataset)}")

# 5. Create DataLoaders
train_loader = DataLoader(
    dataset=train_subset,
    batch_size=BATCH_SIZE,
    shuffle=True,  # Shuffle the training data
    num_workers=4  # Use multiple processes to load data (set to 0 on Windows if errors occur)
)

val_loader = DataLoader(
    dataset=val_subset,
    batch_size=BATCH_SIZE,
    shuffle=False, # Validation data does not need to be shuffled
    num_workers=4
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    num_workers=4
)

print(f"\nSuccessfully loaded {len(train_subset)} training images and {len(val_subset)} validation images and {len(test_dataset)} test images.")

# 6. Check one batch
print("\n  Checking DataLoader  ")
images, labels = next(iter(train_loader))
print(f"Image batch shape: {images.shape}") # [BATCH_SIZE, 3, 224, 224]
print(f"Label batch shape: {labels.shape}") # [BATCH_SIZE]
print(f"Label examples: {labels[:5]}")


Files already downloaded and verified
Files already downloaded and verified
Total Cifar-10 train images: 50000
  -> Split into 45000 for training
  -> Split into 5000 for validation
Total Cifar-10 test images (holdout): 10000

Successfully loaded 45000 training images and 5000 validation images and 10000 test images.

  Checking DataLoader  
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Label examples: tensor([5, 7, 8, 3, 3])


### 3. Model define and training
During the model training phase, we used transfer learning, loading a pre-trained ResNet-34 model. We trained only the last layer of this pre-trained model (ResNet-34) to quickly complete the cat and dog classification task. The code loops 10 times, evaluating the performance with a validation set after each loop, and finally saving the best-performing model.

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time
import copy

# 1. Set device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load pre-trained model (ResNet-34)
# Download ResNet-34 with weights pre-trained on ImageNet
model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)

# 3. Freeze all pre-trained layers
# Set requires_grad = False to stop gradients from flowing to these layers
for param in model.parameters():
    param.requires_grad = False

# 4. Replace the final layer (classifier)
# Get the number of input features for the original final layer ('fc')
num_ftrs = model.fc.in_features 

# Replace it with a new Linear layer
# Output features = 1 (for binary classification: 0=cat, 1=dog)
model.fc = nn.Linear(num_ftrs, 10)

# Move the new model structure to the selected device (GPU)
model = model.to(device)

# 5. Define Loss Function and Optimizer
# Loss Function: BCEWithLogitsLoss
# This is ideal for binary classification. It combines a Sigmoid layer
# with Binary Cross Entropy loss for better numerical stability.
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam
# IMPORTANT: We only pass the parameters of the new final layer
# to the optimizer. Only this layer will be trained.
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Get the class index mapping (from the previous cell) for reference
# We expect {'cat': 0, 'dog': 1}
class_to_idx = full_train_dataset.class_to_idx
print(f"Class mapping: {class_to_idx}")


# 6. Training and Validation Loop
NUM_EPOCHS = 10 # Number of times to loop through the entire training dataset
best_val_acc = 0.0 # Track the best validation accuracy achieved
best_model_wts = copy.deepcopy(model.state_dict()) # Track the weights of the best model

print("\n  Starting Training (Feature Extraction)  ")

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    
    #   Training Phase  
    model.train() # Set model to training mode (enables dropout, etc.)
    val_subset.dataset.transform = train_transform
    
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader:
        # Move data and labels to the GPU
        inputs = inputs.to(device)
        
        labels = labels.to(device) 
        
        # Clear previous gradients
        optimizer.zero_grad()
        
        # Forward pass: get raw model output (logits)
        outputs = model(inputs) 
        
        # Calculate the loss
        loss = criterion(outputs, labels)
        
        # Calculate predictions (0 or 1)
        # 1. Apply sigmoid to logits (0 to 1 probability)
        # 2. Threshold at 0.5 to get True/False
        # 3. Convert to float (0.0 or 1.0)
        _, preds = torch.max(outputs, 1)
        
        # Backpropagation (calculate gradients)
        loss.backward()
        # Update optimizer (only for model.fc weights)
        optimizer.step()
        
        # Accumulate statistics for this epoch
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_subset)
    epoch_acc = running_corrects.double() / len(train_subset)
    
    
    #   Validation Phase  
    model.eval() # Set model to evaluation mode (disables dropout, etc.)
    val_subset.dataset.transform = test_transform
    
    val_loss = 0.0
    val_corrects = 0
    
    with torch.no_grad(): # Disable gradient calculations for validation
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_subset)
    epoch_val_acc = val_corrects.double() / len(val_subset)

    val_subset.dataset.transform = train_transform
    # Print epoch results
    elapsed_time = time.time() - start_time
    print(f'Epoch {epoch+1}/{NUM_EPOCHS} [{elapsed_time:.0f}s]')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    # Save the best model
    if epoch_val_acc > best_val_acc:
        best_val_acc = epoch_val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        # Save checkpoint to disk
        torch.save(model.state_dict(), 'best_model_cifar10.pth')
        print(f'  New best model saved (Acc: {epoch_val_acc:.4f})')

print(f"\nTraining complete. Best validation accuracy: {best_val_acc:.4f}")

# Load the best model weights back into the model
model.load_state_dict(best_model_wts)
print("Best model weights loaded.")

Using device: cuda
Class mapping: {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}

  Starting Training (Feature Extraction)  
Epoch 1/10 [160s]
  Train Loss: 0.9402 Acc: 0.6958
  Val   Loss: 0.5548 Acc: 0.8120
  New best model saved (Acc: 0.8120)
Epoch 2/10 [162s]
  Train Loss: 0.6906 Acc: 0.7645
  Val   Loss: 0.5121 Acc: 0.8242
  New best model saved (Acc: 0.8242)
Epoch 3/10 [163s]
  Train Loss: 0.6702 Acc: 0.7694
  Val   Loss: 0.5203 Acc: 0.8146
Epoch 4/10 [158s]
  Train Loss: 0.6490 Acc: 0.7768
  Val   Loss: 0.4896 Acc: 0.8298
  New best model saved (Acc: 0.8298)
Epoch 5/10 [157s]
  Train Loss: 0.6372 Acc: 0.7810
  Val   Loss: 0.4921 Acc: 0.8276
Epoch 6/10 [169s]
  Train Loss: 0.6312 Acc: 0.7830
  Val   Loss: 0.4733 Acc: 0.8348
  New best model saved (Acc: 0.8348)
Epoch 7/10 [191s]
  Train Loss: 0.6304 Acc: 0.7806
  Val   Loss: 0.4793 Acc: 0.8340
Epoch 8/10 [185s]
  Train Loss: 0.6252 Acc: 0.7854
  Val   Loss: 

### 4. Whole model Fine-Tuning

To improve out model, we now need to "unfreeze" the entire model and let all 34 layers participate in training, but we will use a very small learning rate to avoid destroying the pre-trained knowledge.

In [6]:
# Load the weights of the best model from the first phase
model.load_state_dict(torch.load('best_model_cifar10.pth'))
print("Best model weights loaded, preparing for fine-tuning...")


NUM_EPOCHS_FT = 5       # Train for a few more epochs
LEARNING_RATE_FT = 1e-5 # MUST use a very small learning rate


# 1. Unfreeze all layers
print("Unfreezing all model layers...")
for param in model.parameters():
    param.requires_grad = True

# 2. Create a new optimizer to manage all parameters
optimizer_ft = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FT)

# 3. Run the training and validation loop again
print(f"  Starting Fine-Tuning (LR={LEARNING_RATE_FT})  ")

for epoch in range(NUM_EPOCHS_FT):
    start_time = time.time()
    
    #   Training Phase  
    model.train()
    val_subset.dataset.transform = train_transform
    
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer_ft.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1) 
        loss.backward() # Gradients are now calculated for all layers
        optimizer_ft.step() # All layers are updated
        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_subset)
    epoch_acc = running_corrects.double() / len(train_subset)
    
    #   Validation Phase  
    model.eval()
    val_subset.dataset.transform = test_transform
    
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_subset)
    epoch_val_acc = val_corrects.double() / len(val_subset)

    val_subset.dataset.transform = train_transform
    
    elapsed_time = time.time() - start_time
    print(f'Fine-Tune Epoch {epoch+1}/{NUM_EPOCHS_FT} [{elapsed_time:.0f}s]')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    # Save the final fine-tuned model
    if epoch_val_acc > best_val_acc: 
        best_val_acc = epoch_val_acc
        torch.save(model.state_dict(), 'fine_tuned_best_model_cifar10.pth')
        print(f'  New best fine-tuned model saved (Acc: {epoch_val_acc:.4f})')

print(f"\nFine-tuning complete. Final best validation accuracy: {best_val_acc:.4f}")

Best model weights loaded, preparing for fine-tuning...
Unfreezing all model layers...
  Starting Fine-Tuning (LR=1e-05)  
Fine-Tune Epoch 1/5 [298s]
  Train Loss: 0.3372 Acc: 0.8831
  Val   Loss: 0.1863 Acc: 0.9338
  New best fine-tuned model saved (Acc: 0.9338)
Fine-Tune Epoch 2/5 [310s]
  Train Loss: 0.1839 Acc: 0.9362
  Val   Loss: 0.1537 Acc: 0.9432
  New best fine-tuned model saved (Acc: 0.9432)
Fine-Tune Epoch 3/5 [309s]
  Train Loss: 0.1310 Acc: 0.9544
  Val   Loss: 0.1371 Acc: 0.9518
  New best fine-tuned model saved (Acc: 0.9518)
Fine-Tune Epoch 4/5 [313s]
  Train Loss: 0.0991 Acc: 0.9658
  Val   Loss: 0.1259 Acc: 0.9550
  New best fine-tuned model saved (Acc: 0.9550)
Fine-Tune Epoch 5/5 [304s]
  Train Loss: 0.0748 Acc: 0.9748
  Val   Loss: 0.1168 Acc: 0.9582
  New best fine-tuned model saved (Acc: 0.9582)

Fine-tuning complete. Final best validation accuracy: 0.9582



### 5. Predict test dataset
Using our previously trained and saved best model, run predictions on the test set (test_loader) and format the results as labels of 0 and 1.

In [7]:
import pandas as pd

print("\n--- Starting Final Test Set Evaluation ---")

# 1. Load the best fine-tuned model
model.load_state_dict(torch.load('fine_tuned_best_model_cifar10.pth'))
model.eval() 

# 2. Loop over the test set (val_loader)
test_corrects = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader: # `val_loader` 是我们的 Cifar-10 测试集
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        
        # Get predictions
        _, preds = torch.max(outputs, 1)
        
        # Accumulate totals
        total += labels.size(0)
        test_corrects += torch.sum(preds == labels)

# 3. Calculate final accuracy
final_accuracy = test_corrects.double() / total
print("Evaluation complete.")
print(f"\nFinal Accuracy on Cifar-10 Test Set: {final_accuracy:.4f}")
print(f"Total Correct: {test_corrects} / {total}")


--- Starting Final Test Set Evaluation ---
Evaluation complete.

Final Accuracy on Cifar-10 Test Set: 0.9559
Total Correct: 9559 / 10000
