# Dogs vs. Cats


This Notebook include:
1.  Data preprocessing
2.  Load train, validation and test datasets
3.  Model define and training
4.  Whole model Fine-Tuning
5.  Predict test dataset

### 1. Data preprocessing

Define training transforms(with augmentaion) and validation transforms. This is because during testing, we want to evaluate the model's true performance on images that are "original" and consistent, rather than on randomly varied images.

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, Subset # <--- 导入 Subset
from torch.utils.data import random_split 
from torchvision.datasets import CIFAR10
import os
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time
import copy
# 1. DATA PREPROCESSING (保持不变)
IMG_SIZE = 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0),
    transforms.Normalize(mean=mean, std=std)
])
test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])
print("Data transforms defined successfully (with RandomErasing).")

  from .autonotebook import tqdm as notebook_tqdm


Data transforms defined successfully (with RandomErasing).


### 2. Load train，validation and test datasets

Creat two dataLoaders: one for training and one for validation.

In [2]:
BATCH_SIZE = 64 
DATA_DIR = './dataset' 

full_train_dataset = torchvision.datasets.CIFAR10(
    root=DATA_DIR, train=True, download=True, transform=train_transform)

test_dataset = torchvision.datasets.CIFAR10(
    root=DATA_DIR, train=False, download=True, transform=test_transform)

VAL_SPLIT_SIZE = 5000
TRAIN_SPLIT_SIZE = len(full_train_dataset) - VAL_SPLIT_SIZE
train_subset, val_subset = random_split(
    full_train_dataset, 
    [TRAIN_SPLIT_SIZE, VAL_SPLIT_SIZE],
    generator=torch.Generator().manual_seed(42)
)

print(f"Original split: {len(train_subset)} train, {len(val_subset)} val")

# Simulate data imbalance
# Keep 5 categories (0-4) unchanged, but reduce the amount of data in the other 5 categories (5-9) by 90%

print("\n--- Simulating Data Imbalance for Task (h) ---")
# {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0,  
#  5: 0.1, 6: 0.1, 7: 0.1, 8: 0.1, 9: 0.1} 
imbalance_ratios = {i: 1.0 if i < 5 else 0.1 for i in range(10)}

original_train_indices = np.array(train_subset.indices)
original_train_labels = np.array(full_train_dataset.targets)[original_train_indices]

imbalanced_indices = []
for i, (idx, label) in enumerate(zip(original_train_indices, original_train_labels)):
    if np.random.rand() < imbalance_ratios[label]:
        imbalanced_indices.append(idx)

imbalanced_train_dataset = Subset(full_train_dataset, imbalanced_indices)
print(f"Created imbalanced training set with {len(imbalanced_train_dataset)} images.")

# Address data imbalances
# Calculate the weights for `imbalanced_train_dataset`

# Get the labels of all the images in the new dataset
imbalanced_labels = np.array(full_train_dataset.targets)[imbalanced_train_dataset.indices]

# Calculate the number of each category
class_counts = np.bincount(imbalanced_labels, minlength=10)
print(f"Imbalanced class counts: {class_counts}")

class_weights = 1.0 / (class_counts + 1e-6)

# WeightedRandomSampler
print("\n--- Applying Solution 1: WeightedRandomSampler ---")
# Assign a weight to each image in the dataset for its category
sample_weights = class_weights[imbalanced_labels]

# Create a sampler
from torch.utils.data import WeightedRandomSampler
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights), 
    replacement=True
)

# Create DataLoaders
train_loader = DataLoader(
    dataset=imbalanced_train_dataset, 
    batch_size=BATCH_SIZE,
    sampler=sampler, 
    shuffle=False,   
    num_workers=4  
)

print("train_loader created using WeightedRandomSampler.")

# ---- WeightedLoss  ----
# class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
# print(f"\n--- Applying Solution 2: WeightedLoss (DISABLED) ---")
# train_loader = DataLoader(
#     dataset=imbalanced_train_dataset,
#     batch_size=BATCH_SIZE,
#     shuffle=True, # <--- 使用 shuffle, 不使用 sampler
#     num_workers=4  
# )
# print("train_loader created using shuffle=True (for WeightedLoss).")

val_loader = DataLoader(
    dataset=val_subset, 
    batch_size=BATCH_SIZE,
    shuffle=False, 
    num_workers=4
)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False, 
    num_workers=4
)


Files already downloaded and verified
Files already downloaded and verified
Original split: 45000 train, 5000 val

--- Simulating Data Imbalance for Task (h) ---
Created imbalanced training set with 24788 images.
Imbalanced class counts: [4512 4488 4468 4529 4529  458  488  439  434  443]

--- Applying Solution 1: WeightedRandomSampler ---
train_loader created using WeightedRandomSampler.


### 3. Model define and training
During the model training phase, we used transfer learning, loading a pre-trained ResNet-34 model. We trained only the last layer of this pre-trained model (ResNet-34) to quickly complete the cat and dog classification task. The code loops 10 times, evaluating the performance with a validation set after each loop, and finally saving the best-performing model.

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")
model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features 
model.fc = nn.Linear(num_ftrs, 10)
model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
NUM_EPOCHS = 10 
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

best_val_acc = 0.0 
best_model_wts = copy.deepcopy(model.state_dict()) 

print("\n  Starting Training (Feature Extraction) on Imbalanced Data  ")

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    
    #   Training Phase  
    model.train() 
    val_subset.dataset.transform = train_transform
    
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device) 
        optimizer.zero_grad()
        outputs = model(inputs) 
        loss = criterion(outputs, labels) 
        _, preds = torch.max(outputs, 1) 
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects.double() / len(train_loader.dataset)
    
    
    model.eval()
    val_subset.dataset.transform = test_transform
    
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad(): 
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_subset)
    epoch_val_acc = val_corrects.double() / len(val_subset)
    
    val_subset.dataset.transform = train_transform
    
    elapsed_time = time.time() - start_time
    print(f'Epoch {epoch+1}/{NUM_EPOCHS} [{elapsed_time:.0f}s] LR: {scheduler.get_last_lr()[0]:.1e}')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    if epoch_val_acc > best_val_acc:
        best_val_acc = epoch_val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), 'best_model_cifar10_unbalanced.pth')
        print(f'  New best model saved (Acc: {epoch_val_acc:.4f})')

    scheduler.step()

print(f"\nTraining complete. Best validation accuracy: {best_val_acc:.4f}")
model.load_state_dict(best_model_wts)
print("Best model weights loaded.")


Using device: cuda

  Starting Training (Feature Extraction) on Imbalanced Data  
Epoch 1/10 [123s] LR: 1.0e-03
  Train Loss: 1.2549 Acc: 0.5870
  Val   Loss: 0.7027 Acc: 0.7712
  New best model saved (Acc: 0.7712)
Epoch 2/10 [121s] LR: 9.8e-04
  Train Loss: 0.9263 Acc: 0.6883
  Val   Loss: 0.5951 Acc: 0.7990
  New best model saved (Acc: 0.7990)
Epoch 3/10 [123s] LR: 9.0e-04
  Train Loss: 0.8724 Acc: 0.7010
  Val   Loss: 0.5903 Acc: 0.7992
  New best model saved (Acc: 0.7992)
Epoch 4/10 [124s] LR: 7.9e-04
  Train Loss: 0.8384 Acc: 0.7086
  Val   Loss: 0.5868 Acc: 0.7938
Epoch 5/10 [122s] LR: 6.5e-04
  Train Loss: 0.8283 Acc: 0.7166
  Val   Loss: 0.5493 Acc: 0.8046
  New best model saved (Acc: 0.8046)
Epoch 6/10 [123s] LR: 5.0e-04
  Train Loss: 0.8194 Acc: 0.7150
  Val   Loss: 0.5618 Acc: 0.8010
Epoch 7/10 [121s] LR: 3.5e-04
  Train Loss: 0.7995 Acc: 0.7244
  Val   Loss: 0.5506 Acc: 0.8100
  New best model saved (Acc: 0.8100)
Epoch 8/10 [121s] LR: 2.1e-04
  Train Loss: 0.7858 Acc: 0.73

### 4. Whole model Fine-Tuning

To improve out model, we now need to "unfreeze" the entire model and let all 34 layers participate in training, but we will use a very small learning rate to avoid destroying the pre-trained knowledge.

In [4]:
model.load_state_dict(torch.load('best_model_cifar10_unbalanced.pth'))
print("Best model weights loaded, preparing for fine-tuning...")

NUM_EPOCHS_FT = 5
LEARNING_RATE_FT = 1e-5 

print("Unfreezing all model layers...")
for param in model.parameters():
    param.requires_grad = True

optimizer_ft = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FT)
scheduler_ft = optim.lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=NUM_EPOCHS_FT)

print(f"  Starting Fine-Tuning (LR={LEARNING_RATE_FT}, Epochs={NUM_EPOCHS_FT})  ")

for epoch in range(NUM_EPOCHS_FT):
    start_time = time.time()
    
    #   Training Phase  
    model.train()
    val_subset.dataset.transform = train_transform
    
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader: # train_loader 仍然是带 sampler 的
        inputs = inputs.to(device)
        labels = labels.to(device) 
        optimizer_ft.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels) # criterion 仍然是带 (或不带) weights 的
        _, preds = torch.max(outputs, 1) 
        loss.backward() 
        optimizer_ft.step() 
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects.double() / len(train_loader.dataset)
    
    #   Validation Phase (保持不变)
    model.eval()
    val_subset.dataset.transform = test_transform
    
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_subset)
    epoch_val_acc = val_corrects.double() / len(val_subset)
    
    val_subset.dataset.transform = train_transform
    
    elapsed_time = time.time() - start_time
    print(f'Fine-Tune Epoch {epoch+1}/{NUM_EPOCHS_FT} [{elapsed_time:.0f}s] LR: {scheduler_ft.get_last_lr()[0]:.1e}')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    if epoch_val_acc > best_val_acc: 
        best_val_acc = epoch_val_acc
        torch.save(model.state_dict(), 'fine_tuned_best_model_cifar10_unbalanced.pth')
        print(f'  New best fine-tuned model saved (Acc: {epoch_val_acc:.4f})')

    scheduler_ft.step()

print(f"\nFine-tuning complete. Final best validation accuracy: {best_val_acc:.4f}")

Best model weights loaded, preparing for fine-tuning...
Unfreezing all model layers...
  Starting Fine-Tuning (LR=1e-05, Epochs=5)  
Fine-Tune Epoch 1/5 [176s] LR: 1.0e-05
  Train Loss: 0.4758 Acc: 0.8344
  Val   Loss: 0.2480 Acc: 0.9168
  New best fine-tuned model saved (Acc: 0.9168)
Fine-Tune Epoch 2/5 [184s] LR: 9.0e-06
  Train Loss: 0.2865 Acc: 0.9012
  Val   Loss: 0.1979 Acc: 0.9324
  New best fine-tuned model saved (Acc: 0.9324)
Fine-Tune Epoch 3/5 [192s] LR: 6.5e-06
  Train Loss: 0.2176 Acc: 0.9271
  Val   Loss: 0.1852 Acc: 0.9362
  New best fine-tuned model saved (Acc: 0.9362)
Fine-Tune Epoch 4/5 [193s] LR: 3.5e-06
  Train Loss: 0.1901 Acc: 0.9361
  Val   Loss: 0.1820 Acc: 0.9376
  New best fine-tuned model saved (Acc: 0.9376)
Fine-Tune Epoch 5/5 [192s] LR: 9.5e-07
  Train Loss: 0.1781 Acc: 0.9392
  Val   Loss: 0.1800 Acc: 0.9386
  New best fine-tuned model saved (Acc: 0.9386)

Fine-tuning complete. Final best validation accuracy: 0.9386



### 5. Predict test dataset
Using our previously trained and saved best model, run predictions on the test set (test_loader) and format the results as labels of 0 and 1.

In [5]:
print("\n--- Starting Final Test Set Evaluation (on Holdout Set) ---")

model.load_state_dict(torch.load('fine_tuned_best_model_cifar10_unbalanced.pth', map_location=device))
model.eval() 

test_corrects = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader: # <--- 使用真正的 test_loader
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        test_corrects += torch.sum(preds == labels)

final_accuracy = test_corrects.double() / total
print("Evaluation complete.")
print(f"\nFinal UNBIASED Accuracy on Cifar-10 Test Set: {final_accuracy:.4f}")
print(f"Total Correct: {test_corrects} / {total}")


--- Starting Final Test Set Evaluation (on Holdout Set) ---
Evaluation complete.

Final UNBIASED Accuracy on Cifar-10 Test Set: 0.9334
Total Correct: 9334 / 10000
