In [None]:
# Comprehensive PyTorch CNN for CIFAR-10

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm  # For progress bars

# 1. Define transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors (C x H x W)
    transforms.Normalize((0.5, 0.5, 0.5),  # Normalize each channel (R, G, B) to mean=0.5
                         (0.5, 0.5, 0.5))  # and standard deviation=0.5
])

# 2. Load the CIFAR-10 training and testing datasets
trainset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform
)
trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=64,    # Batch Size defined here
    shuffle=True, 
    num_workers=2
)

testset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform
)
testloader = torch.utils.data.DataLoader(
    testset, 
    batch_size=64,    # Consistent Batch Size for evaluation
    shuffle=False, 
    num_workers=2
)

# 3. Define the CNN model architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional Layer 1
        self.conv1 = nn.Conv2d(
            in_channels=3, 
            out_channels=32, 
            kernel_size=3, 
            stride=1, 
            padding=1
        )
        # Pooling Layer 1: Max Pooling
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)  # Reduces H and W by half
        
        # Convolutional Layer 2
        self.conv2 = nn.Conv2d(
            in_channels=32, 
            out_channels=64, 
            kernel_size=3, 
            stride=1, 
            padding=1
        )
        # Pooling Layer 2: Average Pooling
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)  # Further reduces H and W by half
        
        # Convolutional Layer 3
        self.conv3 = nn.Conv2d(
            in_channels=64, 
            out_channels=64, 
            kernel_size=3, 
            stride=1, 
            padding=1
        )
        # Pooling Layer 3: Max Pooling
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)  # Final downsampling
        
        # Fully Connected Layer 1
        self.fc1 = nn.Linear(64 * 4 * 4, 64)  # 64 channels * 4 height * 4 width = 1024 input features
        
        # Output Layer
        self.fc2 = nn.Linear(64, 10)          # 64 input features to 10 classes
        
        # Activation Function
        self.relu = nn.ReLU()                 # ReLU activation function
    
    def forward(self, x):
        # Convolutional Layer 1 -> ReLU -> Pooling Layer 1
        x = self.pool1(self.relu(self.conv1(x)))  # Output shape: [batch_size, 32, 16, 16]
        
        # Convolutional Layer 2 -> ReLU -> Pooling Layer 2
        x = self.pool2(self.relu(self.conv2(x)))  # Output shape: [batch_size, 64, 8, 8]
        
        # Convolutional Layer 3 -> ReLU -> Pooling Layer 3
        x = self.pool3(self.relu(self.conv3(x)))  # Output shape: [batch_size, 64, 4, 4]
        
        # Flatten the tensor into a vector for Fully Connected Layers
        x = x.view(-1, 64 * 4 * 4)  # Reshape to [batch_size, 1024]
        
        # Fully Connected Layer 1 -> ReLU
        x = self.relu(self.fc1(x))  # Output shape: [batch_size, 64]
        
        # Output Layer
        x = self.fc2(x)  # Output shape: [batch_size, 10]
        
        return x

# 4. Instantiate the model, define loss function and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Select GPU if available
model = CNN().to(device)  # Move model to device

criterion = nn.CrossEntropyLoss()  # Define loss function for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Define optimizer with learning rate

# 5. Training Loop
num_epochs = 10  # Number of epochs for training

for epoch in range(num_epochs):
    running_loss = 0.0  # Initialize running loss for the epoch
    model.train()  # Set model to training mode
    
    # Use tqdm to create a progress bar for the training loop
    for inputs, labels in tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to device
        
        optimizer.zero_grad()  # Zero the parameter gradients
        
        outputs = model(inputs)  # Forward pass: compute predicted outputs
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backward pass: compute gradient of the loss w.r.t. model parameters
        optimizer.step()  # Update model parameters
        
        running_loss += loss.item()  # Accumulate loss for the epoch
    
    # Calculate and print average loss for the epoch
    avg_loss = running_loss / len(trainloader)
    print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}")

print('Finished Training')  # Indicate end of training

# 6. Evaluation on Test Data
model.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient computation for evaluation
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)  # Move data to device
        
        outputs = model(images)  # Forward pass
        _, predicted = torch.max(outputs.data, 1)  # Get predictions
        
        total += labels.size(0)  # Increment total samples
        correct += (predicted == labels).sum().item()  # Increment correct predictions

test_accuracy = 100 * correct / total  # Calculate accuracy
print(f"Test Accuracy: {test_accuracy:.2f}%")  # Print test accuracy

# 7. Saving the Model Checkpoint
checkpoint_path = 'cnn_checkpoint.pth'  # Define checkpoint path
torch.save(model.state_dict(), checkpoint_path)  # Save model's state dictionary
print("Model checkpoint saved.")  # Confirmation message

# 8. Loading the Model Checkpoint (Optional)
# To load the model later:
# model = CNN()  # Initialize the model architecture
# model.load_state_dict(torch.load(checkpoint_path))  # Load saved parameters
# model.to(device)  # Move to device
# model.eval()  # Set to evaluation mode
# print("Model checkpoint loaded.")  # Confirmation message


# Line-by-Line Explanation of PyTorch CNN Code

## Imports

- **`torch`, `torch.nn`, `torch.optim`:**
  - **Purpose:** Core PyTorch libraries essential for building and optimizing neural network models.
  - **Usage:** 
    - `torch` provides the fundamental data structures and operations.
    - `torch.nn` offers modules and loss functions to construct neural networks.
    - `torch.optim` contains optimization algorithms like SGD and Adam for training models.

- **`torchvision`, `torchvision.transforms`:**
  - **Purpose:** Utilities for handling image datasets and applying transformations.
  - **Usage:**
    - `torchvision` provides popular datasets, model architectures, and image transformations.
    - `torchvision.transforms` allows for preprocessing steps such as resizing, cropping, normalization, and data augmentation.

- **`tqdm`:**
  - **Purpose:** Provides progress bars for visualizing training progress.
  - **Usage:** Wraps around iterable objects to display real-time progress during training loops, enhancing monitoring and user experience.

## Transformations

- **`transforms.ToTensor()`:**
  - **Function:** Converts images from PIL format to PyTorch tensors.
  - **Effect:** Scales pixel values from the range [0, 255] to [0.0, 1.0], preparing them for neural network processing.

- **`transforms.Normalize(...)`:**
  - **Function:** Normalizes the tensor data.
  - **Effect:** Centers the data by subtracting the mean (0.5) and scales it by dividing by the standard deviation (0.5) for each RGB channel. This standardization facilitates faster convergence and improves model performance.

## Data Loading

- **`torchvision.datasets.CIFAR10(...)`:**
  - **Function:** Downloads and loads the CIFAR-10 dataset.
  - **Effect:** Applies the defined transformations to the dataset, preparing it for training and testing.

- **`torch.utils.data.DataLoader(...)`:**
  - **Function:** Creates iterators (`trainloader` and `testloader`) for the training and testing datasets.
  - **Parameters:**
    - **`batch_size`:** Specifies the number of samples processed in each batch.
    - **`shuffle`:** Randomizes the order of data, enhancing training by preventing the model from learning the order of the data.
    - **`num_workers`:** Determines the number of subprocesses for data loading, enabling parallel data loading for efficiency.

## Model Definition (CNN Class)

### `__init__` Method

- **Convolutional Layers (`conv1`, `conv2`, `conv3`):**
  - **`conv1`:**
    - **Input Channels:** 3 (RGB)
    - **Output Channels:** 32 (number of filters)
    - **Kernel Size:** 3x3
    - **Stride:** 1 (moves the filter one pixel at a time)
    - **Padding:** 1 (preserves the spatial dimensions of the input)
  
  - **`conv2`:**
    - **Input Channels:** 32
    - **Output Channels:** 64
    - **Kernel Size:** 3x3
    - **Stride:** 1
    - **Padding:** 1
  
  - **`conv3`:**
    - **Input Channels:** 64
    - **Output Channels:** 64
    - **Kernel Size:** 3x3
    - **Stride:** 1
    - **Padding:** 1

- **Pooling Layers (`pool1`, `pool2`, `pool3`):**
  - **`pool1` and `pool3`:**
    - **Type:** Max Pooling
    - **Kernel Size:** 2x2
    - **Stride:** 2
    - **Padding:** 0
    - **Effect:** Reduces spatial dimensions by half (e.g., 32x32 → 16x16).
  
  - **`pool2`:**
    - **Type:** Average Pooling
    - **Kernel Size:** 2x2
    - **Stride:** 2
    - **Padding:** 0
    - **Effect:** Further reduces spatial dimensions (e.g., 16x16 → 8x8).

- **Fully Connected Layers (`fc1`, `fc2`):**
  - **`fc1`:**
    - **Input Features:** 64 * 4 * 4 = 1024 (flattened from previous layers)
    - **Output Features:** 64
  
  - **`fc2`:**
    - **Input Features:** 64
    - **Output Features:** 10 (number of CIFAR-10 classes)

- **Activation Function (`relu`):**
  - **Function:** Applies the ReLU (Rectified Linear Unit) activation function.
  - **Purpose:** Introduces non-linearity after convolutional layers, enabling the network to learn complex patterns.

### `forward` Method

- **`self.pool1(self.relu(self.conv1(x)))`:**
  - **Process:** 
    - **Convolution (`conv1`)**: Extracts features from the input.
    - **Activation (`ReLU`)**: Applies non-linearity.
    - **Pooling (`pool1`)**: Reduces spatial dimensions.
  - **Output Shape:** `[batch_size, 32, 16, 16]`

- **`self.pool2(self.relu(self.conv2(x)))`:**
  - **Process:** 
    - **Convolution (`conv2`)**: Extracts higher-level features.
    - **Activation (`ReLU`)**: Applies non-linearity.
    - **Pooling (`pool2`)**: Further reduces spatial dimensions.
  - **Output Shape:** `[batch_size, 64, 8, 8]`

- **`self.pool3(self.relu(self.conv3(x)))`:**
  - **Process:** 
    - **Convolution (`conv3`)**: Extracts complex features.
    - **Activation (`ReLU`)**: Applies non-linearity.
    - **Pooling (`pool3`)**: Final downsampling.
  - **Output Shape:** `[batch_size, 64, 4, 4]`

- **`x.view(-1, 64 * 4 * 4)`:**
  - **Process:** Reshapes the tensor from `[batch_size, 64, 4, 4]` to `[batch_size, 1024]`.
  - **Purpose:** Flattens the multi-dimensional tensor into a 2D tensor suitable for fully connected layers.

- **`self.relu(self.fc1(x))`:**
  - **Process:** 
    - **Fully Connected Layer 1 (`fc1`)**: Transforms the flattened tensor.
    - **Activation (`ReLU`)**: Applies non-linearity.
  - **Output Shape:** `[batch_size, 64]`

- **`self.fc2(x)`:**
  - **Process:** 
    - **Output Layer (`fc2`)**: Produces logits for each of the 10 classes.
  - **Output Shape:** `[batch_size, 10]`

- **`return x`:**
  - **Function:** Returns the final output logits for classification.

## Model Instantiation and Setup

- **`device`:**
  - **Function:** Checks if CUDA (GPU) is available.
  - **Effect:** Selects GPU for faster computations if available; otherwise, defaults to CPU.

- **`model = CNN().to(device)`:**
  - **Function:** Instantiates the CNN model and transfers it to the selected device (GPU or CPU).

- **`criterion = nn.CrossEntropyLoss()`:**
  - **Function:** Defines the loss function suitable for multi-class classification.
  - **Effect:** Combines `LogSoftmax` and `NLLLoss` in one single class, computing the loss between predicted logits and true labels.

- **`optimizer = optim.Adam(model.parameters(), lr=0.001)`:**
  - **Function:** Initializes the Adam optimizer with a learning rate of 0.001.
  - **Effect:** Updates model parameters based on computed gradients during training.

## Training Loop

- **`for epoch in range(num_epochs)`:**
  - **Function:** Iterates over the specified number of epochs, representing complete passes through the training dataset.

- **`running_loss = 0.0`:**
  - **Function:** Initializes a variable to accumulate the loss over the epoch for reporting purposes.

- **`model.train()`:**
  - **Function:** Sets the model to training mode.
  - **Effect:** Enables layers like dropout and batch normalization to behave accordingly during training.

- **Batch Loop (`for inputs, labels in tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}")`):**
  - **Function:** Iterates over batches of data from the `trainloader`.
  - **Effect:** Utilizes `tqdm` to display a progress bar for monitoring training progress.

- **Within Each Batch:**
  - **`inputs, labels = inputs.to(device), labels.to(device)`:**
    - **Function:** Transfers input data and labels to the selected device (GPU or CPU).
  
  - **`optimizer.zero_grad()`:**
    - **Function:** Clears existing gradients to prevent accumulation from previous iterations.
  
  - **`outputs = model(inputs)`:**
    - **Function:** Performs a forward pass through the model to obtain predictions.
  
  - **`loss = criterion(outputs, labels)`:**
    - **Function:** Computes the loss by comparing predictions with true labels.
  
  - **`loss.backward()`:**
    - **Function:** Performs backpropagation to compute gradients of the loss with respect to model parameters.
  
  - **`optimizer.step()`:**
    - **Function:** Updates the model's parameters based on the computed gradients and the optimizer's algorithm.
  
  - **`running_loss += loss.item()`:**
    - **Function:** Accumulates the loss for the current batch to calculate the average loss later.

- **After Each Epoch:**
  - **`avg_loss = running_loss / len(trainloader)`:**
    - **Function:** Calculates the average loss over all batches in the epoch.
  
  - **`print(f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}")`:**
    - **Function:** Prints the average loss for the epoch, providing insight into training progress.

## Evaluation on Test Data

- **`model.eval()`:**
  - **Function:** Sets the model to evaluation mode.
  - **Effect:** Disables layers like dropout and batch normalization, ensuring consistent behavior during evaluation.

- **Initialize Counters (`correct = 0`, `total = 0`):**
  - **Function:** Tracks the number of correct predictions and total samples for accuracy calculation.

- **`with torch.no_grad()`:**
  - **Function:** Disables gradient computation to optimize memory and computation during evaluation.

- **Test Loop (`for data in testloader`):**
  - **Function:** Iterates over batches from the `testloader`.

- **Within Each Test Batch:**
  - **`images, labels = data`:**
    - **Function:** Retrieves images and labels from the batch.
  
  - **`images, labels = images.to(device), labels.to(device)`:**
    - **Function:** Transfers data to the selected device.
  
  - **`outputs = model(images)`:**
    - **Function:** Performs a forward pass to obtain predictions.
  
  - **`_, predicted = torch.max(outputs.data, 1)`:**
    - **Function:** Identifies the class with the highest probability as the predicted label.
  
  - **`total += labels.size(0)`:**
    - **Function:** Increments the total number of samples processed.
  
  - **`correct += (predicted == labels).sum().item()`:**
    - **Function:** Increments the count of correctly predicted samples.

- **After Test Loop:**
  - **`test_accuracy = 100 * correct / total`:**
    - **Function:** Calculates the overall test accuracy percentage.
  
  - **`print(f"Test Accuracy: {test_accuracy:.2f}%")`:**
    - **Function:** Prints the test accuracy, indicating the model's performance on unseen data.

## Saving the Model Checkpoint

- **`checkpoint_path = 'cnn_checkpoint.pth'`:**
  - **Function:** Defines the file path for saving the model's state dictionary.

- **`torch.save(model.state_dict(), checkpoint_path)`:**
  - **Function:** Saves the model's parameters to the specified file.
  - **Effect:** Serializes the state dictionary containing all learnable parameters (weights and biases).

- **`print("Model checkpoint saved.")`:**
  - **Function:** Confirms that the model has been successfully saved.

---


In [None]:
# Define the path to save the checkpoint
checkpoint_path = 'cnn_checkpoint.pth'

# Save the model's state dictionary
torch.save(model.state_dict(), checkpoint_path)
print("Model checkpoint saved.")


In [None]:
# Initialize a new instance of the model
model = CNN()  # Ensure the architecture matches
model.to(device)  # Move to device

# Load the saved state dictionary
model.load_state_dict(torch.load(checkpoint_path))
model.eval()  # Set to evaluation mode

print("Model checkpoint loaded.")



## Loading the Model Checkpoint

- **`model = CNN()`:**
  - **Function:** Initializes a new instance of the CNN model.
  - **Purpose:** Creates a fresh model architecture to load the saved parameters into.

- **`model.to(device)`:**
  - **Function:** Transfers the newly initialized model to the selected device (GPU or CPU).

- **`model.load_state_dict(torch.load(checkpoint_path))`:**
  - **Function:** Loads the saved parameters into the model.
  - **Effect:** Populates the model's weights and biases with the values from the saved state dictionary.

- **`model.eval()`:**
  - **Function:** Sets the model to evaluation mode.
  - **Purpose:** Ensures that layers like dropout and batch normalization behave correctly during inference.

- **`print("Model checkpoint loaded.")`:**

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses, callbacks
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

def create_cnn_model(out_channels1=32, out_channels2=64, out_channels3=128):
    model = models.Sequential()
    model.add(layers.Conv2D(out_channels1, (3, 3), strides=1, padding='same', input_shape=(32, 32, 3)))
    model.add(layers.ReLU())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
    model.add(layers.Conv2D(out_channels2, (3, 3), strides=1, padding='same'))
    model.add(layers.ReLU())
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=2, padding='valid'))
    model.add(layers.Conv2D(out_channels3, (3, 3), strides=1, padding='same'))
    model.add(layers.ReLU())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    return model

model = create_cnn_model()
model.compile(optimizer=optimizers.Adam(learning_rate=0.001),
              loss=losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

checkpoint_path = 'cnn_checkpoint.h5'
checkpoint = callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                       monitor='val_accuracy',
                                       save_best_only=True,
                                       verbose=1)

history = model.fit(train_images, train_labels,
                    epochs=10,
                    batch_size=64,
                    validation_data=(test_images, test_labels),
                    callbacks=[checkpoint],
                    verbose=2)

test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

model.load_weights(checkpoint_path)
model.evaluate(test_images, test_labels, verbose=0)
print("Model checkpoint loaded.")


# Summary Table

| **Feature**           | **PyTorch**                                      | **TensorFlow (Keras)**                            |
|-----------------------|--------------------------------------------------|----------------------------------------------------|
| **Computation Graph** | Dynamic (Eager Execution)                        | Dynamic with optional static graph (via `@tf.function`) |
| **Model Definition**  | Imperative (Subclassing `nn.Module`)             | Declarative (Sequential and Functional APIs)        |
| **Training Loop**     | Custom loops                                     | Built-in `model.fit()` with options for custom loops |
| **Customization**     | Highly flexible and customizable                 | Flexible but more structured                        |
| **Data Handling**     | `DataLoader` and `Dataset`                       | `tf.data` API                                       |
| **Deployment**        | TorchScript, ONNX                                | TensorFlow Serving, TensorFlow Lite                  |
| **Community Focus**   | Research and prototyping                          | Industry and production                              |
