Let's investigate the use of drop out in neural networks and compare the performance of a network with dropout and without dropout

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
INPUT_DIM = 128 # Number of features

In [None]:
# Generate synthetic data
X, y = make_classification(
    n_samples=300,
    n_features=INPUT_DIM,
    n_informative=INPUT_DIM,
    n_redundant=0,
    random_state=7
)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape, X_test.shape

((240, 128), (60, 128))

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, use_dropout):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(INPUT_DIM, 64)
        self.fc2 = nn.Linear(64, 16)
        self.fc3 = nn.Linear(16, 1)
        self.dropout1 = nn.Dropout(0.2) # Probability of neuron "off" = 20%
        self.dropout2 = nn.Dropout(0.2)
        self.act1 = nn.ReLU()
        self.act2 = nn.ReLU()
        self.use_dropout = use_dropout

    def forward(self, x):
        x = self.act1(self.fc1(x))
        x = self.dropout1(x) if self.use_dropout else x
        x = self.act2(self.fc2(x))
        x = self.dropout2(x) if self.use_dropout else x
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

In [None]:
# Function to train and evaluate the model
def train_and_evaluate(model):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor.view(-1, 1))
        if epoch % 5 == 0:
          print(loss)
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    with torch.no_grad():
        model.eval()
        pred = model(X_test_tensor)
        predictions = (pred > 0.5).float().numpy()
        accuracy = accuracy_score(y_test_tensor, predictions)

    return accuracy

In [None]:
# Create and train the model without dropout
model_without_dropout = NeuralNetwork(use_dropout=False)
accuracy_without_dropout = train_and_evaluate(model_without_dropout)

tensor(0.7545, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6060, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4941, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3908, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2944, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2088, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.1380, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0855, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0512, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0300, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0179, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0113, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0076, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0055, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0042, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0034, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0028, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0024, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0021, grad_fn=<Bina

In [None]:
# Create and train the model without dropout
model_with_dropout = NeuralNetwork(use_dropout=True)
accuracy_with_dropout = train_and_evaluate(model_with_dropout)

tensor(0.7664, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6336, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5635, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5094, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4458, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3597, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2605, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2278, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.1905, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.1346, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0949, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0804, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0653, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0487, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0351, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0259, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0211, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0192, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.0161, grad_fn=<Bina

In [None]:
print("Accuracy without dropout :", accuracy_without_dropout)
print("Accuracy with dropout:", accuracy_with_dropout)

Accuracy without dropout : 0.7
Accuracy with dropout: 0.7333333333333333


Overfitting can occur in the following cases:
- Complex architecture
- Less data samples

Hence adding dropout can mitigate overfitting