# Dogs vs. Cats


This Notebook include:
1.  Data preprocessing
2.  Load train, validation and test datasets
3.  Model define and training
4.  Whole model Fine-Tuning
5.  Predict test dataset

### 1. Data preprocessing

Define training transforms(with augmentaion) and validation transforms. This is because during testing, we want to evaluate the model's true performance on images that are "original" and consistent, rather than on randomly varied images.

In [12]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from PIL import Image
import os
import glob

# Define image size 
IMG_SIZE = 224

# Mean and std for ImageNet normalization
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# 1. Define transforms for training data (with augmentation)
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),      # 1. Resize the image
    transforms.RandomHorizontalFlip(),          # 2. Randomly flip the image horizontally
    transforms.RandomRotation(10),              # 3. Randomly rotate the image (+/- 10 degrees)
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1), # 4. Apply slight color jitter
    transforms.ToTensor(),                      # 5. Convert to PyTorch Tensor (scales to [0, 1])
    transforms.Normalize(mean=mean, std=std)    # 6. Normalize the tensor
])

# 2. Define transforms for validation & test data (no augmentation)
test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),      # 1. Resize the image
    transforms.ToTensor(),                      # 2. Convert to PyTorch Tensor
    transforms.Normalize(mean=mean, std=std)    # 3. Normalize the tensor
])

print("Data transforms defined successfully.")

Data transforms defined successfully.


### 2. Load train and validation datasets

Creat two dataLoaders: one for training and one for validation.

In [None]:
# 1. Define Data Paths 
TRAIN_DIR = 'datasets/train'
VAL_DIR = 'datasets/val'

BATCH_SIZE = 64 

# 2. Create Dataset Instances

# Load the training set and apply training transforms
train_dataset = ImageFolder(root=TRAIN_DIR, transform=train_transform)

# Load the validation set and apply test transforms (no augmentation)
val_dataset = ImageFolder(root=VAL_DIR, transform=test_transform)

# 3. Check Labels
# ImageFolder assigns labels alphabetically. 'cat' comes before 'dog'.
print(f"Training set class mapping: {train_dataset.class_to_idx}")
print(f"Validation set class mapping: {val_dataset.class_to_idx}")

# 4. Create DataLoaders
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,  # Shuffle the training data
    num_workers=4  # Use multiple processes to load data (set to 0 on Windows if errors occur)
)

val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False, # Validation data does not need to be shuffled
    num_workers=4
)

print(f"\nSuccessfully loaded {len(train_dataset)} training images and {len(val_dataset)} validation images.")

# 5. Check one batch
print("\n  Checking DataLoader  ")
images, labels = next(iter(train_loader))
print(f"Image batch shape: {images.shape}") # [BATCH_SIZE, 3, 224, 224]
print(f"Label batch shape: {labels.shape}") # [BATCH_SIZE]
print(f"Label examples (0=cat, 1=dog): {labels[:5]}")


Training set class mapping: {'cat': 0, 'dog': 1}
Validation set class mapping: {'cat': 0, 'dog': 1}

Successfully loaded 20000 training images and 5000 validation images.

--- Checking DataLoader ---
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Label examples (0=cat, 1=dog): tensor([0, 1, 1, 1, 1])


### 3. Load test datasets

Thus `test` folder is flat (e.g., `test/1.jpg`, `test/2.jpg`) and has no tags. For this case, we still need a simple custom `Dataset` to load the images and extract their IDs (for the final commit).

In [14]:
class TestImageDataset(Dataset):
    """
    Custom Dataset for loading the test set, which has a flat structure
    (e.g., test/1.jpg, test/2.jpg ...).
    """
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        # Assume all test images are .jpg format
        self.file_paths = glob.glob(os.path.join(self.root_dir, '*.jpg'))
        # Sort paths by ID (1.jpg, 2.jpg, ...), ensuring correct submission order
        self.file_paths.sort(key=lambda x: int(os.path.basename(x).split('.')[0]))

    def __len__(self):
        # Return the total number of test images
        return len(self.file_paths)

    def __getitem__(self, idx):
        # Get the image path for the given index
        img_path = self.file_paths[idx]
        
        # Load the image (using PIL) and convert to RGB
        image = Image.open(img_path).convert('RGB')
        
        # Apply the pre-processing transforms
        if self.transform:
            image = self.transform(image)
        
        # Extract the image ID from the filename (e.g., '.../test/123.jpg' -> 123)
        img_id = int(os.path.basename(img_path).split('.')[0])
        
        # Return the processed image and its ID
        return image, img_id

print("TestImageDataset class defined successfully.")

TestImageDataset class defined successfully.


In [15]:
#  1. Define Test Path 
# !!! Make sure this matches your folder name !!!
TEST_DIR = 'datasets/test'
# TEST_DIR = 'datasets/test12500'

try:
    #  2. Create Test Dataset 
    # Use the custom TestImageDataset defined earlier
    test_dataset = TestImageDataset(root_dir=TEST_DIR, transform=test_transform)
    
    if len(test_dataset) > 0:
        #   3. Create Test DataLoader  
        test_loader = DataLoader(
            dataset=test_dataset,
            batch_size=BATCH_SIZE, # Can re-use the BATCH_SIZE from training
            shuffle=False,      # CRITICAL: Never shuffle the test set!
            num_workers=4
        )
        
        print(f"\nSuccessfully loaded {len(test_dataset)} test images.")
        
        #   4. Check one batch  
        print("\n  Checking Test Loader  ")
        images, ids = next(iter(test_loader))
        print(f"Test image batch shape: {images.shape}")
        print(f"Test image ID shape: {ids.shape}")
        print(f"Test image ID examples: {ids[:5]}")
    else:
        print(f"Warning: No .jpg files found in '{TEST_DIR}'.")

except FileNotFoundError:
    print(f"Error: Test path '{TEST_DIR}' not found.")
except Exception as e:
    print(f"Error loading test data: {e}")


Successfully loaded 500 test images.

--- Checking Test Loader ---
Test image batch shape: torch.Size([64, 3, 224, 224])
Test image ID shape: torch.Size([64])
Test image ID examples: tensor([1, 2, 3, 4, 5])


### 4. Model define and training
During the model training phase, we used transfer learning, loading a pre-trained ResNet-34 model. We trained only the last layer of this pre-trained model (ResNet-34) to quickly complete the cat and dog classification task. The code loops 10 times, evaluating the performance with a validation set after each loop, and finally saving the best-performing model.

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time
import copy

# 1. Set device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load pre-trained model (ResNet-34)
# Download ResNet-34 with weights pre-trained on ImageNet
model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)

# 3. Freeze all pre-trained layers
# Set requires_grad = False to stop gradients from flowing to these layers
for param in model.parameters():
    param.requires_grad = False

# 4. Replace the final layer (classifier)
# Get the number of input features for the original final layer ('fc')
num_ftrs = model.fc.in_features 

# Replace it with a new Linear layer
# Output features = 1 (for binary classification: 0=cat, 1=dog)
model.fc = nn.Linear(num_ftrs, 1)

# Move the new model structure to the selected device (GPU)
model = model.to(device)

# 5. Define Loss Function and Optimizer
# Loss Function: BCEWithLogitsLoss
# This is ideal for binary classification. It combines a Sigmoid layer
# with Binary Cross Entropy loss for better numerical stability.
criterion = nn.BCEWithLogitsLoss()

# Optimizer: Adam
# IMPORTANT: We only pass the parameters of the new final layer
# to the optimizer. Only this layer will be trained.
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Get the class index mapping (from the previous cell) for reference
# We expect {'cat': 0, 'dog': 1}
class_to_idx = train_dataset.class_to_idx
print(f"Class mapping: {class_to_idx}")


# 6. Training and Validation Loop
NUM_EPOCHS = 10 # Number of times to loop through the entire training dataset
best_val_acc = 0.0 # Track the best validation accuracy achieved
best_model_wts = copy.deepcopy(model.state_dict()) # Track the weights of the best model

print("\n  Starting Training (Feature Extraction)  ")

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    
    #   Training Phase  
    model.train() # Set model to training mode (enables dropout, etc.)
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader:
        # Move data and labels to the GPU
        inputs = inputs.to(device)
        # Reshape labels to [BATCH_SIZE, 1] and type float for the loss function
        labels = labels.float().view(-1, 1).to(device) 
        
        # Clear previous gradients
        optimizer.zero_grad()
        
        # Forward pass: get raw model output (logits)
        outputs = model(inputs) 
        
        # Calculate the loss
        loss = criterion(outputs, labels)
        
        # Calculate predictions (0 or 1)
        # 1. Apply sigmoid to logits (0 to 1 probability)
        # 2. Threshold at 0.5 to get True/False
        # 3. Convert to float (0.0 or 1.0)
        preds = (torch.sigmoid(outputs) > 0.5).float() 
        
        # Backpropagation (calculate gradients)
        loss.backward()
        # Update optimizer (only for model.fc weights)
        optimizer.step()
        
        # Accumulate statistics for this epoch
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    
    
    #   Validation Phase  
    model.eval() # Set model to evaluation mode (disables dropout, etc.)
    val_loss = 0.0
    val_corrects = 0
    
    with torch.no_grad(): # Disable gradient calculations for validation
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.float().view(-1, 1).to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            preds = (torch.sigmoid(outputs) > 0.5).float()
            
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_dataset)
    epoch_val_acc = val_corrects.double() / len(val_dataset)
    
    # Print epoch results
    elapsed_time = time.time() - start_time
    print(f'Epoch {epoch+1}/{NUM_EPOCHS} [{elapsed_time:.0f}s]')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    # Save the best model
    if epoch_val_acc > best_val_acc:
        best_val_acc = epoch_val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        # Save checkpoint to disk
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'  New best model saved (Acc: {epoch_val_acc:.4f})')

print(f"\nTraining complete. Best validation accuracy: {best_val_acc:.4f}")

# Load the best model weights back into the model
model.load_state_dict(best_model_wts)
print("Best model weights loaded.")

Using device: cuda
Class mapping: {'cat': 0, 'dog': 1}

--- Starting Training (Feature Extraction) ---
Epoch 1/10 [24s]
  Train Loss: 0.1618 Acc: 0.9534
  Val   Loss: 0.0778 Acc: 0.9746
  New best model saved (Acc: 0.9746)
Epoch 2/10 [25s]
  Train Loss: 0.0807 Acc: 0.9722
  Val   Loss: 0.0610 Acc: 0.9796
  New best model saved (Acc: 0.9796)
Epoch 3/10 [25s]
  Train Loss: 0.0716 Acc: 0.9738
  Val   Loss: 0.0545 Acc: 0.9810
  New best model saved (Acc: 0.9810)
Epoch 4/10 [25s]
  Train Loss: 0.0654 Acc: 0.9761
  Val   Loss: 0.0508 Acc: 0.9824
  New best model saved (Acc: 0.9824)
Epoch 5/10 [25s]
  Train Loss: 0.0616 Acc: 0.9766
  Val   Loss: 0.0505 Acc: 0.9816
Epoch 6/10 [25s]
  Train Loss: 0.0588 Acc: 0.9787
  Val   Loss: 0.0487 Acc: 0.9826
  New best model saved (Acc: 0.9826)
Epoch 7/10 [24s]
  Train Loss: 0.0577 Acc: 0.9768
  Val   Loss: 0.0485 Acc: 0.9830
  New best model saved (Acc: 0.9830)
Epoch 8/10 [24s]
  Train Loss: 0.0586 Acc: 0.9777
  Val   Loss: 0.0479 Acc: 0.9844
  New best 

### 5. Whole model Fine-Tuning

To improve out model, we now need to "unfreeze" the entire model and let all 34 layers participate in training, but we will use a very small learning rate to avoid destroying the pre-trained knowledge.

In [18]:
# Load the weights of the best model from the first phase
model.load_state_dict(torch.load('best_model.pth'))
print("Best model weights loaded, preparing for fine-tuning...")


NUM_EPOCHS_FT = 5       # Train for a few more epochs
LEARNING_RATE_FT = 1e-5 # MUST use a very small learning rate


# 1. Unfreeze all layers
print("Unfreezing all model layers...")
for param in model.parameters():
    param.requires_grad = True

# 2. Create a new optimizer to manage all parameters
optimizer_ft = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FT)

# 3. Run the training and validation loop again
print(f"  Starting Fine-Tuning (LR={LEARNING_RATE_FT})  ")

for epoch in range(NUM_EPOCHS_FT):
    start_time = time.time()
    
    #   Training Phase  
    model.train()
    running_loss = 0.0
    running_corrects = 0
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.float().view(-1, 1).to(device) 
        
        optimizer_ft.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        preds = (torch.sigmoid(outputs) > 0.5).float() 
        loss.backward() # Gradients are now calculated for all layers
        optimizer_ft.step() # All layers are updated
        
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    
    #   Validation Phase  
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.float().view(-1, 1).to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = (torch.sigmoid(outputs) > 0.5).float()
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)
            
    epoch_val_loss = val_loss / len(val_dataset)
    epoch_val_acc = val_corrects.double() / len(val_dataset)
    
    elapsed_time = time.time() - start_time
    print(f'Fine-Tune Epoch {epoch+1}/{NUM_EPOCHS_FT} [{elapsed_time:.0f}s]')
    print(f'  Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print(f'  Val   Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f}')

    # Save the final fine-tuned model
    if epoch_val_acc > best_val_acc: 
        best_val_acc = epoch_val_acc
        torch.save(model.state_dict(), 'fine_tuned_best_model.pth')
        print(f'  New best fine-tuned model saved (Acc: {epoch_val_acc:.4f})')

print(f"\nFine-tuning complete. Final best validation accuracy: {best_val_acc:.4f}")

Best model weights loaded, preparing for fine-tuning...
Unfreezing all model layers...
--- Starting Fine-Tuning (LR=1e-05) ---
Fine-Tune Epoch 1/5 [63s]
  Train Loss: 0.0427 Acc: 0.9841
  Val   Loss: 0.0285 Acc: 0.9906
Fine-Tune Epoch 2/5 [64s]
  Train Loss: 0.0235 Acc: 0.9916
  Val   Loss: 0.0271 Acc: 0.9912
  New best fine-tuned model saved (Acc: 0.9912)
Fine-Tune Epoch 3/5 [63s]
  Train Loss: 0.0140 Acc: 0.9953
  Val   Loss: 0.0267 Acc: 0.9908
Fine-Tune Epoch 4/5 [63s]
  Train Loss: 0.0103 Acc: 0.9964
  Val   Loss: 0.0271 Acc: 0.9908
Fine-Tune Epoch 5/5 [63s]
  Train Loss: 0.0098 Acc: 0.9970
  Val   Loss: 0.0257 Acc: 0.9916
  New best fine-tuned model saved (Acc: 0.9916)

Fine-tuning complete. Final best validation accuracy: 0.9916



### 6. Predict test dataset
Using our previously trained and saved best model, run predictions on the test set (test_loader) and format the results as labels of 0 and 1.

In [19]:
import pandas as pd

# 1. Set model to evaluation mode
model.eval()

results = [] # List to store (id, label) pairs
# Class indices { 'cat': 0, 'dog': 1 }
if class_to_idx.get('dog') != 1 or class_to_idx.get('cat') != 0:
    print("Warning: Class indices do not match expected (cat=0, dog=1)!")
    print(f"Current indices: {class_to_idx}")


print("\n--- Starting predictions on the test set ---")

# 2. Prediction Loop
with torch.no_grad():
    for images, ids in test_loader:
        # Move images to the GPU
        images = images.to(device)
        
        # Forward pass: get raw model outputs (logits)
        outputs = model(images) # Shape: [Batch, 1]
        
        # --- NOTE: This block generates 0/1 integer labels ---
        # --- This is BAD for LogLoss competitions ---
        
        # Convert logits to probabilities (0.0 to 1.0)
        probs = torch.sigmoid(outputs)
        
        # Convert probabilities to 0 or 1 predictions
        # (probs > 0.5) creates a boolean (True/False) tensor
        # .int() converts True -> 1 and False -> 0
        preds = (probs > 0.5).int() 
        
        # Move prediction tensor from GPU to CPU ( .cpu() ) and convert to a list
        preds_list = preds.view(-1).cpu().tolist()
        
        # Store the (ID, Label) pairs
        for img_id, label in zip(ids, preds_list):
            results.append({
                "id": int(img_id), # Ensure ID is an integer
                "label": label     # The predicted label (0 or 1)
            })

print("Predictions complete.")

# 3. Create Submission File
if len(results) > 0:
    # Convert the results list to a pandas DataFrame
    submission_df = pd.DataFrame(results)
    
    submission_df = submission_df.sort_values(by="id")
    
    # Save to a .csv file
    # index=False prevents pandas from writing an extra index column
    submission_df.to_csv("submission.csv", index=False)
    
    print(f"\nSuccessfully created 'submission.csv'.")
    print("File preview:")
    print(submission_df.head()) # Print the first 5 rows
    print(f"\nTotal rows: {len(submission_df)}")

else:
    print("No predictions were generated. Check your test_loader.")


--- Starting predictions on the test set ---
Predictions complete.

Successfully created 'submission.csv'.
File preview:
   id  label
0   1      0
1   2      0
2   3      0
3   4      1
4   5      1

Total rows: 500


### 7. Predict test dataset with probability

We need to modify cell 6 to output the raw probability instead of the 0/1 label. This is because the Kaggle competition requires submitting the "probability of being a dog," allowing us to assess the accuracy of our tests.

In [20]:
import pandas as pd

model.eval()
results = []


print("\n--- Starting predictions on test set (generating probabilities) ---")

# 1. Prediction Loop 
with torch.no_grad():
    for images, ids in test_loader:
        images = images.to(device)
        
        # Get raw model outputs (logits)
        outputs = model(images) # Shape: [Batch, 1]
        
        # --- CRITICAL CHANGE for LogLoss ---
        # 1. Do NOT convert to 0/1
        # 2. Submit the raw probability
        # Apply sigmoid to convert logits to probabilities (0.0 to 1.0)
        probs = torch.sigmoid(outputs) 
        
        # Move probabilities from GPU back to CPU
        probs_list = probs.view(-1).cpu().tolist()
        
        # Store (ID, Probability) pairs
        for img_id, probability in zip(ids, probs_list):
            results.append({
                "id": int(img_id),
                "label": probability  # The 'label' is now a float, e.g., 0.983 or 0.012
            })

print("Predictions complete.")

# 2. Create Submission File ---
submission_df = pd.DataFrame(results)
submission_df = submission_df.sort_values(by="id")

# --- CRITICAL: LogLoss Trap Protection! ---
# LogLoss is infinite at exactly 0 or 1.
# We must "clip" our probabilities to a safe range, e.g., [0.005, 0.995]
submission_df['label'] = submission_df['label'].clip(0.005, 0.995)

# 3.Save the final CSV file
submission_df.to_csv("submission_with_probs.csv", index=False)

print(f"\nSuccessfully created 'submission_with_probs.csv'.")
print("File preview (note 'label' column now contains probabilities):")
print(submission_df.head())


--- Starting predictions on test set (generating probabilities) ---
Predictions complete.

Successfully created 'submission_with_probs.csv'.
File preview (note 'label' column now contains probabilities):
   id  label
0   1  0.005
1   2  0.005
2   3  0.005
3   4  0.995
4   5  0.995
