# Module 2: Implement and Test a PyTorch-Based Classifier
---

In [None]:
# Import necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix

print(f"PyTorch version: {torch.__version__}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Define paths and parameters
dataset_path = './images_dataSAT/'
IMG_SIZE = 64
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.001
NUM_CLASSES = 2

## Task 1: Question: Why is random initialization useful for the model?

**Answer:**

Random initialization is useful for the model for several important reasons:

1. **Symmetry Breaking:** If all weights were initialized to the same value (e.g., zero), all neurons in a layer would compute the same output and receive the same gradient updates during backpropagation. This means they would never differentiate from each other. Random initialization breaks this symmetry, allowing each neuron to learn different features.

2. **Diverse Feature Learning:** Random initialization ensures that different neurons start with different weight values, enabling them to capture diverse patterns and features from the input data.

3. **Efficient Gradient Flow:** Proper random initialization (e.g., Xavier/Glorot or He initialization) helps maintain appropriate gradient magnitudes during training, preventing vanishing or exploding gradient problems.

4. **Exploration of Loss Landscape:** Random starting points allow the optimizer to explore different regions of the loss landscape, increasing the chance of finding a good local minimum.

5. **Reproducibility with Seeds:** By setting a random seed, experiments can be reproduced while still benefiting from random initialization.

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

## Task 2: Create the training transformation pipeline train_transform using transforms.Compose.

In [None]:
# Task 2: Create training transformation pipeline
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(degrees=45),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

print("Training transformation pipeline created:")
print(train_transform)

## Task 3: Create the validation transformation pipeline val_transform.

In [None]:
# Task 3: Create validation transformation pipeline
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

print("Validation transformation pipeline created:")
print(val_transform)

In [None]:
# Load full dataset and split into train/val
full_dataset = datasets.ImageFolder(root=dataset_path)

# Calculate split sizes
total_size = len(full_dataset)
val_size = int(0.2 * total_size)
train_size = total_size - val_size

# Split dataset
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms
train_dataset.dataset.transform = train_transform

# Create train_loader
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

print(f"Total dataset size: {total_size}")
print(f"Training set size: {train_size}")
print(f"Validation set size: {val_size}")
print(f"Classes: {full_dataset.classes}")
print(f"Class to idx: {full_dataset.class_to_idx}")

## Task 4: Create val_loader for the validation dataset.

In [None]:
# Task 4: Create val_loader
# Apply val_transform to the validation subset
# We need to create separate datasets with proper transforms
train_dataset_proper = datasets.ImageFolder(root=dataset_path, transform=train_transform)
val_dataset_proper = datasets.ImageFolder(root=dataset_path, transform=val_transform)

# Use same split indices
train_indices, val_indices = torch.utils.data.random_split(
    range(total_size), [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

train_subset = torch.utils.data.Subset(train_dataset_proper, train_indices.indices)
val_subset = torch.utils.data.Subset(val_dataset_proper, val_indices.indices)

train_loader = DataLoader(
    train_subset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_loader = DataLoader(
    val_subset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

print(f"val_loader created successfully!")
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")
print(f"Training samples: {len(train_subset)}")
print(f"Validation samples: {len(val_subset)}")

In [None]:
# Define the CNN model
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(CNNClassifier, self).__init__()
        
        # Convolutional layers
        self.features = nn.Sequential(
            # Conv Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Conv Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Conv Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Conv Block 4
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )
        
        # Classifier layers
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Initialize the model
model = CNNClassifier(num_classes=NUM_CLASSES).to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## Task 5: Question: What is tqdm used for?

**Answer:**

`tqdm` is a Python library used to display **progress bars** for loops and iterative processes. Its name comes from the Arabic word "taqaddum" (تقدّم) meaning "progress."

In the context of deep learning model training, `tqdm` is used for:

1. **Visual Progress Tracking:** It wraps around iterable objects (like data loaders) and displays a dynamic progress bar showing how many batches have been processed out of the total.

2. **Time Estimation:** It provides an estimated time of arrival (ETA) for the completion of the loop, helping users gauge how long training will take.

3. **Real-time Metrics Display:** When combined with `set_postfix()` or `set_description()`, it can display real-time training metrics such as loss and accuracy alongside the progress bar.

4. **Iteration Speed:** It shows the iterations per second (it/s), which helps monitor training throughput.

Example usage: `for batch in tqdm(train_loader, desc='Training'):` displays a progress bar like:
```
Training: 75%|███████▌  | 30/40 [00:15<00:05, 2.00it/s, loss=0.342, acc=0.891]
```

## Task 6: Question: Why are the train_loss, train_correct, and train_total set to 0 in every epoch?

**Answer:**

The variables `train_loss`, `train_correct`, and `train_total` are reset to 0 at the beginning of every epoch for the following reasons:

1. **Per-Epoch Metric Calculation:** These variables serve as accumulators to compute the average loss and accuracy **for each individual epoch**. If they were not reset, the metrics would accumulate across epochs and give incorrect (cumulative) values rather than per-epoch performance.

2. **Accurate Monitoring:** Resetting ensures that:
   - `train_loss` accumulates only the current epoch's total loss, which is then divided by the number of batches to get the average loss for that epoch.
   - `train_correct` counts only the correctly classified samples in the current epoch.
   - `train_total` counts only the total samples processed in the current epoch.
   - The accuracy is calculated as `train_correct / train_total` for each epoch independently.

3. **Fair Comparison:** This allows for a fair comparison of model performance across epochs, making it easy to observe whether the model is improving, overfitting, or plateauing.

4. **Clean State:** Starting fresh each epoch ensures no residual data from previous epochs contaminates the current epoch's metrics.

In [None]:
# Training loop with history tracking
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': []
}

best_val_acc = 0.0

for epoch in range(EPOCHS):
    # ========== Training Phase ==========
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    train_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{EPOCHS} [Train]')
    for images, labels in train_bar:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Track metrics
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        
        train_bar.set_postfix(loss=loss.item(), acc=train_correct/train_total)
    
    epoch_train_loss = train_loss / len(train_loader)
    epoch_train_acc = train_correct / train_total
    
    # ========== Validation Phase ==========
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        val_bar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{EPOCHS} [Val]')
        for images, labels in val_bar:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
            val_bar.set_postfix(loss=loss.item(), acc=val_correct/val_total)
    
    epoch_val_loss = val_loss / len(val_loader)
    epoch_val_acc = val_correct / val_total
    
    # Save history
    history['train_loss'].append(epoch_train_loss)
    history['train_acc'].append(epoch_train_acc)
    history['val_loss'].append(epoch_val_loss)
    history['val_acc'].append(epoch_val_acc)
    
    # Save best model
    if epoch_val_acc > best_val_acc:
        best_val_acc = epoch_val_acc
        torch.save(model.state_dict(), 'best_pytorch_model.pth')
        print(f"  >> Best model saved with val_acc: {best_val_acc:.4f}")
    
    print(f'Epoch {epoch+1}/{EPOCHS} - '
          f'Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f} - '
          f'Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}')
    print()

print(f"\nTraining completed! Best Validation Accuracy: {best_val_acc:.4f}")

## Task 7: Question: Why do you need to use torch.no_grad() in the validation loop?

**Answer:**

`torch.no_grad()` is used in the validation loop for several critical reasons:

1. **Disable Gradient Computation:** During validation, we only need to evaluate the model's performance — we do NOT want to update the model weights. `torch.no_grad()` disables gradient tracking, which means PyTorch will not build the computational graph needed for backpropagation.

2. **Memory Efficiency:** Gradient computation requires storing intermediate activations and building a computational graph, which consumes significant GPU/CPU memory. By disabling gradients, memory usage is substantially reduced, allowing larger batch sizes or preventing out-of-memory errors.

3. **Faster Computation:** Without the overhead of tracking gradients and building the computational graph, forward passes execute faster, making the validation phase more efficient.

4. **Preventing Accidental Updates:** Using `torch.no_grad()` provides a safety mechanism that ensures no gradient is accidentally computed and no weight update occurs during validation, which would corrupt the model's learned parameters.

5. **Correct Evaluation:** It ensures the model is evaluated purely on its current state without any modifications, providing an unbiased assessment of performance on unseen data.

## Task 8: Question: What are two different metrics on which the model can be evaluated for best performance during training?

**Answer:**

Two different metrics on which the model can be evaluated for best performance during training are:

### 1. Validation Accuracy (`val_accuracy`)
- **What it measures:** The proportion of correctly classified samples out of all samples in the validation set.
- **Formula:** `Accuracy = Number of Correct Predictions / Total Number of Predictions`
- **When to use:** Best suited for **balanced datasets** where classes have roughly equal representation.
- **Monitoring:** Save the model checkpoint when `val_accuracy` reaches its **maximum** value (`mode='max'`).

### 2. Validation Loss (`val_loss`)
- **What it measures:** The average loss (e.g., cross-entropy loss) computed on the validation set, indicating how well the model's predicted probability distribution matches the true labels.
- **Why it's useful:** Loss provides a more nuanced and continuous measure of model performance compared to accuracy. A model can have the same accuracy but different loss values — lower loss indicates higher confidence in correct predictions.
- **When to use:** Useful for both balanced and **imbalanced datasets**, and when you want to monitor the model's confidence.
- **Monitoring:** Save the model checkpoint when `val_loss` reaches its **minimum** value (`mode='min'`).

**Other possible metrics** include F1-score, Precision, Recall, and AUC-ROC, but `val_accuracy` and `val_loss` are the two most commonly used during training.

## Task 9: Plot the Model Loss from the training history of the model.

In [None]:
# Task 9: Plot Model Loss
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Training Loss vs Validation Loss
axes[0].plot(range(1, EPOCHS+1), history['train_loss'], label='Training Loss', color='blue', linewidth=2, marker='o', markersize=4)
axes[0].plot(range(1, EPOCHS+1), history['val_loss'], label='Validation Loss', color='red', linewidth=2, marker='s', markersize=4)
axes[0].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Plot 2: Training Accuracy vs Validation Accuracy
axes[1].plot(range(1, EPOCHS+1), history['train_acc'], label='Training Accuracy', color='blue', linewidth=2, marker='o', markersize=4)
axes[1].plot(range(1, EPOCHS+1), history['val_acc'], label='Validation Accuracy', color='red', linewidth=2, marker='s', markersize=4)
axes[1].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.suptitle('PyTorch Model Training History', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Print final metrics
print(f"Final Training Loss: {history['train_loss'][-1]:.4f}")
print(f"Final Validation Loss: {history['val_loss'][-1]:.4f}")
print(f"Final Training Accuracy: {history['train_acc'][-1]:.4f}")
print(f"Final Validation Accuracy: {history['val_acc'][-1]:.4f}")
print(f"Best Validation Accuracy: {max(history['val_acc']):.4f} (Epoch {np.argmax(history['val_acc'])+1})")
print(f"Lowest Validation Loss: {min(history['val_loss']):.4f} (Epoch {np.argmin(history['val_loss'])+1})")

## Task 10: Code for the images from val_loader, get a list of: All predictions all_preds and The ground truth labels all_labels.

In [None]:
# Task 10: Get all predictions and ground truth labels from val_loader

# Load the best model
model.load_state_dict(torch.load('best_pytorch_model.pth'))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(val_loader, desc='Getting predictions'):
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        # Append to lists
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

print(f"Total predictions: {len(all_preds)}")
print(f"Total ground truth labels: {len(all_labels)}")
print(f"\nFirst 20 predictions:  {all_preds[:20]}")
print(f"First 20 true labels:  {all_labels[:20]}")

# Calculate accuracy
accuracy = np.mean(all_preds == all_labels)
print(f"\nOverall Accuracy: {accuracy:.4f}")

# Classification report
class_names = full_dataset.classes
print(f"\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
print(f"Confusion Matrix:")
print(cm)

---
## All 10 tasks completed successfully.