# Transfer Learning Pre-trained VGG16 for Image Classification

This notebook demonstrates how to implement and train a pretrained VGG16 model using PyTorch for binary image classification (Dogs vs Cats). We'll break down the implementation into several key sections,
1. Setting up the environment and importing dependencies
2. Data preparation and loading
3. Model architecture implementation
4. Training utilities and visualization functions
5. Training loop and model evaluation

## 1. Setup and Dependencies

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models, datasets
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import numpy as np
import time
from PIL import Image
import os

In [None]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Constants
BATCH_SIZE = 256
IMG_SIZE = 224
NUM_EPOCHS = 10

## 2. Data Preparation

### 2.1 Data Augmentation and Transforms
We'll set up data augmentation for training and basic transforms for validation. Data augmentation helps prevent overfitting and improves model generalization.

In [None]:
# Training transforms with augmentation
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomAffine(0, translate=(0.14, 0.14)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4863, 0.4532, 0.4155], std=[0.2621, 0.2557, 0.2582]) #eda based on train dataset
])
# Validation transforms (no augmentation)
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4863, 0.4532, 0.4155], std=[0.2621, 0.2557, 0.2582]) #eda
])

### 2.2 Dataset Loading
Now we'll load our datasets using PyTorch's ImageFolder

In [None]:
# Load datasets
train_dataset = datasets.ImageFolder(
    root='datasets/datasets/train',
    transform=train_transform
)

val_dataset = datasets.ImageFolder(
    root='datasets/datasets/val',
    transform=val_transform
)

In [None]:
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

In [None]:
# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

## 3. Model Architecture
We'll implement Pretrained VGG16 as base model and remove top original classifier layer and add new classifier with batch normalization and dropout

In [None]:
class VGG16Model(nn.Module):
    def __init__(self, num_classes=2):
        super(VGG16Model, self).__init__()
        # Load pretrained VGG16
        self.vgg16 = models.vgg16(pretrained=True)
        
        # Freeze VGG16 layers
        for param in self.vgg16.parameters():
            param.requires_grad = False
            
        # Remove original classifier
        self.features = self.vgg16.features
        
        # Create new classifier
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.BatchNorm1d(512 * 7 * 7),
            nn.Linear(512 * 7 * 7, 256),
            nn.Softplus(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            
            nn.Linear(256, 256),
            nn.Softplus(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            
            nn.Linear(256, 256),
            nn.Softplus(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            
            nn.Linear(256, 256),
            nn.Softplus(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            
            nn.Linear(256, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
# Create model instance"
model = VGG16Model().to(device)
# Print model summary,
total_params = sum(p.numel() for p in model.parameters())
print(f'Total parameters: {total_params:,}')

## 4. Training Utilities
### 4.1 Visualization Function

In [None]:
def plot_training_history(train_losses, val_losses, train_accs, val_accs, title=''):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))
    
    # Loss plot
    ax[0].plot(train_losses, label='Training Loss')
    ax[0].plot(val_losses, label='Validation Loss')
    ax[0].legend(loc='upper right')
    ax[0].set_title(title + ' Loss')
    
    # Accuracy plot
    ax[1].plot(train_accs, label='Training Accuracy')
    ax[1].plot(val_accs, label='Validation Accuracy')
    ax[1].legend(loc='lower right')
    ax[1].set_title(title + ' Accuracy')
    
    plt.show()

### 4.2 Training Function

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    since = time.time()
    best_val_loss = float('inf')
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    best_acc = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        
        val_loss = val_loss / len(val_loader)
        val_acc = 100. * correct / total
        
        # Update best accuracy
        if val_acc > best_acc:
            best_acc = val_acc
            
        # Save best model based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'val_acc': val_acc,
            }, 'ptVgg16.pth')
        
        # Store metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%')
        print('-' * 50)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:.4f}'.format(best_acc)) 
      
    return train_losses, val_losses, train_accs, val_accs

## 5. Training and Evaluation

In [None]:
# Define loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Define optimizer with initial learning rate
initial_lr = 0.001
optimizer = optim.SGD(params=params_to_update, lr=0.initial_lr, momentum=0.9) # try SGD
# optimizer = optim.Adam(model.parameters(), lr=initial_lr, weight_decay=1e-4)  # Added weight decay for regularization
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)


In [None]:
# Train the model
history = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=NUM_EPOCHS,
    device=device
)

In [None]:
# Unpack the history
train_losses, val_losses, train_accs, val_accs, best_acc = history

# Plot the results
plot_metrics(train_losses, val_losses, train_accs, val_accs)

# Inference on Test dataset and Analysis

This notebook performs:
1. Model loading and inference on test dataset
2. CSV generation for predictions
3. Visualization of results
4. Feature map extraction and visualization

## 1. Model loading and inference on test dataset
### 1.1 Test dataset transforms and processing

In [None]:
# Define image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4863, 0.4532, 0.4155], std=[0.2621, 0.2557, 0.2582])
])

In [None]:
# Load test dataset directly from folder
test_dataset = datasets.ImageFolder(
    root="datasets/test",
    transform=transform
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

### 1.2 Model loading and inference on processed test dataset

In [None]:
#load model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('ptVgg16.pth')
model.to(device)
model.eval()

In [None]:
# Get file paths
file_paths = [os.path.basename(x[0]) for x in test_dataset.imgs]

# Make predictions
predictions = []
with torch.no_grad():
    for batch in test_loader:
        images = batch[0].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

## 2. Export CSV for predictions

In [None]:
# Create DataFrame with predictions
df_predictions = pd.DataFrame({
    'ImageId': range(1, len(predictions) + 1),
    'Label': predictions,
    'Prediction': [class_names[p] for p in predictions],
    'Filename': file_paths
})

# Save predictions to CSV
df_predictions.to_csv('predictions.csv', index=False)

## 3. Visualizations of Results

We will visualise predictions in plot to check which image is classified incorrectly

In [None]:
# Visualize first 100 predictions
plt.figure(figsize=(20, 20))
for i in range(100):
    ax = plt.subplot(10, 10, i + 1)
    
    # Get and display image
    image, _ = test_dataset[i]
    img = image.permute(1, 2, 0)
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
    img = img.numpy()
    img = np.clip(img, 0, 1)
    
    plt.imshow(img)
    plt.title(class_names[predictions[i]])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Visualize 2nd 100 predictions
plt.figure(figsize=(20, 20))
for i in range(100,200):
    ax = plt.subplot(10, 10, i + 1)
    
    # Get and display image
    image, _ = test_dataset[i]
    img = image.permute(1, 2, 0)
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
    img = img.numpy()
    img = np.clip(img, 0, 1)
    
    plt.imshow(img)
    plt.title(class_names[predictions[i]])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Visualize 2nd 100 predictions
plt.figure(figsize=(20, 20))
for i in range(200, 300):
    ax = plt.subplot(10, 10, i + 1)
    
    # Get and display image
    image, _ = test_dataset[i]
    img = image.permute(1, 2, 0)
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
    img = img.numpy()
    img = np.clip(img, 0, 1)
    
    plt.imshow(img)
    plt.title(class_names[predictions[i]])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Visualize 3rd 100 predictions
plt.figure(figsize=(20, 20))
for i in range(300, 400):
    ax = plt.subplot(10, 10, i + 1)
    
    # Get and display image
    image, _ = test_dataset[i]
    img = image.permute(1, 2, 0)
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
    img = img.numpy()
    img = np.clip(img, 0, 1)
    
    plt.imshow(img)
    plt.title(class_names[predictions[i]])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Visualize 3rd 100 predictions
plt.figure(figsize=(20, 20))
for i in range(400, 500):
    ax = plt.subplot(10, 10, i + 1)
    
    # Get and display image
    image, _ = test_dataset[i]
    img = image.permute(1, 2, 0)
    img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
    img = img.numpy()
    img = np.clip(img, 0, 1)
    
    plt.imshow(img)
    plt.title(class_names[predictions[i]])
    plt.axis('off')

plt.tight_layout()
plt.show()

## 4. Feature Map Extraction and Visualization for Analysis

In [None]:
class FeatureExtractor():
    def __init__(self, model):
        self.model = model
        self.features = {}
        
        # Register hooks for VGG features
        self.conv_layers = [
            'features.2',  # After first conv block
            'features.7',  # After second conv block
            'features.14', # Middle of network
            'features.21', # Later conv block
            'features.28'  # Last conv layer
        ]
        
        # Register hooks for classifier layers
        self.classifier_layers = [
            'classifier.2',  # First Linear + Softplus
            'classifier.6',  # Second Linear + Softplus
            'classifier.10', # Third Linear + Softplus
            'classifier.14'  # Fourth Linear + Softplus
        ]
        
        # Register hooks for all target layers
        for name, layer in model.named_modules():
            if name in self.conv_layers or name in self.classifier_layers:
                layer.register_forward_hook(self.save_feature_maps(name))
    
    def save_feature_maps(self, name):
        def hook(module, input, output):
            self.features[name] = output.detach()
        return hook

In [None]:
def visualize_feature_maps(feature_maps, layer_name, num_features=16):
    # Handle both convolutional and linear layer outputs
    if len(feature_maps.shape) == 4:  # Conv layer (B, C, H, W)
        feature_maps = feature_maps[0]  # Get first batch
        n_features = min(num_features, feature_maps.shape[0])
        grid_size = int(np.ceil(np.sqrt(n_features)))
        
        plt.figure(figsize=(20, 20))
        plt.suptitle(f'Feature Maps for {layer_name}', fontsize=16)
        
        for idx in range(n_features):
            plt.subplot(grid_size, grid_size, idx + 1)
            plt.imshow(feature_maps[idx].cpu(), cmap='viridis')
            plt.axis('off')
            plt.title(f'Filter {idx}')
            
    else:  # Linear layer (B, N)
        plt.figure(figsize=(10, 5))
        plt.suptitle(f'Activation Values for {layer_name}', fontsize=16)
        plt.plot(feature_maps[0].cpu().numpy())
        plt.xlabel('Neuron Index')
        plt.ylabel('Activation Value')
    
    plt.tight_layout()
    plt.show()


In [None]:
# Load and prepare image
image_path = 'datasets/test/500.jpg'
image = Image.open(image_path)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Load your custom model
checkpoint = torch.load('ptVgg16.pth')
model = VGG16Model()
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()

In [None]:
# Create feature extractor
feature_extractor = FeatureExtractor(model)

In [None]:
# Show original image
plt.figure(figsize=(8, 8))
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.show()

In [None]:
# Get prediction and features
with torch.no_grad():
    output = model(input_tensor)
    features = feature_extractor.features
    _, predicted = torch.max(output.data, 1)
    prediction = class_names[predicted.item()]
    probabilities = output[0]  # Already softmax in model
    
print(f'Predicted class: {prediction}')
print(f'Probabilities: Cat: {probabilities[0]:.3f}, Dog: {probabilities[1]:.3f}')

In [None]:
# Visualize convolutional feature maps
print("\nConvolutional Layer Feature Maps:")
for layer_name in feature_extractor.conv_layers:
    if layer_name in features:
        print(f"\nVisualizing {layer_name}:")
        visualize_feature_maps(features[layer_name], layer_name)

In [None]:
# Visualize classifier layer activations
print("\nClassifier Layer Activations:")
for layer_name in feature_extractor.classifier_layers:
    if layer_name in features:
        print(f"\nVisualizing {layer_name}:")
        visualize_feature_maps(features[layer_name], layer_name)

In [None]:
# Feature statistics analysis
print("\nFeature Map Statistics:")
for layer_name, feature_map in features.items():
    feature_map = feature_map[0].cpu()  # Get first image if batch
    stats = {
        'mean': feature_map.mean().item(),
        'std': feature_map.std().item(),
        'min': feature_map.min().item(),
        'max': feature_map.max().item(),
        'active_neurons': (feature_map > 0).float().mean().item() * 100  # % of active neurons
    }
    print(f"\n{layer_name}:")
    for stat_name, value in stats.items():
        print(f"{stat_name}: {value:.3f}")