# Applying Dropout to Prevent Overfitting

## Step 1: Setup and Data Preparation

In [None]:
# Step 1: Setup and Data Preparation
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# MNIST Dataset
# Transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
])

# Download and load training and validation datasets
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
val_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=1000, shuffle=False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 17.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 481kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.23MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 3.15MB/s]


## Step 2: PyTorch Dropout Integration


In [None]:
# Step 2: PyTorch Dropout Integration
# Define a neural network with Dropout
class DropoutNetworkRefactored2(nn.Module):
    def __init__(self, input_dim=784, hidden_dim1=256, hidden_dim2=128, output_dim=10, dropout_rate=0.5):
        super(DropoutNetworkRefactored2, self).__init__()

        self.input_to_hidden1 = nn.Linear(input_dim, hidden_dim1)
        self.dropout_after_hidden1 = nn.Dropout(p=dropout_rate)

        self.hidden1_to_hidden2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.dropout_after_hidden2 = nn.Dropout(p=dropout_rate)

        self.hidden2_to_output = nn.Linear(hidden_dim2, output_dim)

    def forward(self, x):
        # Flatten the input tensor
        x = x.view(x.size(0), -1)

        x = torch.relu(self.input_to_hidden1(x))
        x = self.dropout_after_hidden1(x)

        x = torch.relu(self.hidden1_to_hidden2(x))
        x = self.dropout_after_hidden2(x)

        x = self.hidden2_to_output(x)
        return x

## Step 3: Training the Model with Dropout


In [None]:
# Step 3: Training the Model with Dropout
model = DropoutNetworkRefactored2(dropout_rate=0.5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10 # Reduced for quicker demonstration

for epoch in range(num_epochs):
    model.train()  # Enable dropout behavior
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train

    # Validation phase
    model.eval()  # Disable dropout for evaluation
    validation_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad(): # No gradients needed for validation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            validation_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss = validation_loss / len(val_loader)
    val_acc = 100 * correct_val / total_val

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

Epoch [1/10], Train Loss: 0.4178, Train Acc: 87.09%, Val Loss: 0.1491, Val Acc: 95.48%
Epoch [2/10], Train Loss: 0.2305, Train Acc: 93.31%, Val Loss: 0.1185, Val Acc: 96.57%
Epoch [3/10], Train Loss: 0.1962, Train Acc: 94.32%, Val Loss: 0.1040, Val Acc: 96.95%
Epoch [4/10], Train Loss: 0.1737, Train Acc: 94.92%, Val Loss: 0.0898, Val Acc: 97.11%
Epoch [5/10], Train Loss: 0.1645, Train Acc: 95.17%, Val Loss: 0.0880, Val Acc: 97.44%
Epoch [6/10], Train Loss: 0.1514, Train Acc: 95.59%, Val Loss: 0.0891, Val Acc: 97.30%
Epoch [7/10], Train Loss: 0.1461, Train Acc: 95.75%, Val Loss: 0.0814, Val Acc: 97.50%
Epoch [8/10], Train Loss: 0.1353, Train Acc: 96.07%, Val Loss: 0.0845, Val Acc: 97.57%
Epoch [9/10], Train Loss: 0.1364, Train Acc: 95.92%, Val Loss: 0.0819, Val Acc: 97.56%
Epoch [10/10], Train Loss: 0.1293, Train Acc: 96.09%, Val Loss: 0.0789, Val Acc: 97.65%
