## Reducing Overfitting with Weight Regularization
One strategy to combat overfitting neural networks is by penalizing the parameters
(i.e., weights) of the neural network such that they are driven to be small values,
creating a simpler model less prone to overfit. This method is called weight regulari‐
zation or weight decay

In [1]:
# Import libraries
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import RMSprop
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [2]:
#Create training and test sets
features, target = make_classification(n_classes=2, n_features=10,
n_samples=1000)
features_train, features_test, target_train, target_test = train_test_split(
features, target, test_size=0.1, random_state=1)
# Set random seed
torch.manual_seed(0)
np.random.seed(0)
# Convert data to PyTorch tensors
x_train = torch.from_numpy(features_train).float()
y_train = torch.from_numpy(target_train).float().view(-1, 1)
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(target_test).float().view(-1, 1)

In [3]:
# Define a neural network using `Sequential`
class SimpleNeuralNet(nn.Module):
    def __init__(self):
        super(SimpleNeuralNet, self).__init__()
        self.sequential = torch.nn.Sequential(
        torch.nn.Linear(10, 16),
        torch.nn.ReLU(),
        torch.nn.Linear(16,16),
        torch.nn.ReLU(),
        torch.nn.Linear(16, 1),
        torch.nn.Sigmoid()
        )
    def forward(self, x):
        x = self.sequential(x)
        return x
# Initialize neural network
network = SimpleNeuralNet()
# Define loss function, optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(network.parameters(), lr=1e-4, weight_decay=1e-5)
# Define data loader
train_data = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
# Compile the model using torch 2.0's optimizer
network = torch.compile(network)
# Train neural network
epochs = 100
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
# Evaluate neural network
with torch.no_grad():
    output = network(x_test)
    test_loss = criterion(output, y_test)
    test_accuracy = (output.round() == y_test).float().mean()
    print("Test Loss:", test_loss.item(), "\tTest Accuracy:",
    test_accuracy.item())

Test Loss: 0.4532115161418915 	Test Accuracy: 0.8199999928474426


## Reducing Overfitting with Early Stopping
one of the most common and very effective methods to counter overfitting is to monitor the training process and stop training when the test error starts to increase. This strategy is called early stopping.

here we use the popular library lightning (known as PyTorch Lightning) to use an out-of-the-box one. PyTorch Lightning is a high-level library for PyTorch that provides a lot of useful features. In our solution, we included PyTorch Lightning’s
EarlyStopping(monitor="val_loss", mode="min", patience=3) to define that we wanted to monitor the test (validation) loss at each epoch, and if the test loss has not improved after three epochs (the default), training is interrupted.

In [4]:
# Import libraries
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import RMSprop
import lightning as pl
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Create training and test sets
features, target = make_classification(n_classes=2, n_features=10,
n_samples=1000)
features_train, features_test, target_train, target_test = train_test_split(
features, target, test_size=0.1, random_state=1)
# Set random seed
torch.manual_seed(0)
np.random.seed(0)
# Convert data to PyTorch tensors
x_train = torch.from_numpy(features_train).float()
y_train = torch.from_numpy(target_train).float().view(-1, 1)
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(target_test).float().view(-1, 1)

In [5]:
class SimpleNeuralNet(nn.Module):
    def __init__(self):
        super(SimpleNeuralNet, self).__init__()
        self.sequential = torch.nn.Sequential(
        torch.nn.Linear(10, 16),
        torch.nn.ReLU(),
        torch.nn.Linear(16,16),
        torch.nn.ReLU(),
        torch.nn.Linear(16, 1),
        torch.nn.Sigmoid()
        )
    def forward(self, x):
        x = self.sequential(x)
        return x

In [6]:
class LightningNetwork(pl.LightningModule):
    def __init__(self, network):
        super().__init__()
        self.network = network
        self.criterion = nn.BCELoss()
        self.metric = nn.functional.binary_cross_entropy

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        data, target = batch
        output = self.network(data)
        loss = self.criterion(output, target)
        self.log("val_loss", loss)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [7]:
# Define data loader
train_data = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
# Initialize neural network
network = LightningNetwork(SimpleNeuralNet())
# Train network
trainer = pl.Trainer(callbacks=[EarlyStopping(monitor="val_loss", mode="min",
patience=3)], max_epochs=1000)
trainer.fit(model=network, train_dataloaders=train_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/isabella/Documents/notes_ml/lightning_logs
2023-12-27 23:53:15.558546: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-27 23:53:15.602277: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-27 23:53:15.837255: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-27 23:53:15.837492: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempt

Training: 0it [00:00, ?it/s]

## Reducing Overfitting with Dropout

Dropout is a fairly common method for regularizing smaller neural networks. In
dropout, every time a batch of observations is created for training, a proportion of
the units in one or more layers is multiplied by zero (i.e., dropped). In this setting,
every batch is trained on the same network (e.g., the same parameters), but each
batch is confronted by a slightly different version of that network’s architecture. they learn to be robust to disruptions
(i.e., noise) in the other hidden units, and this prevents the network from simply
memorizing the training data

In [8]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import RMSprop
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [9]:
# Create training and test sets
features, target = make_classification(n_classes=2, n_features=10,
n_samples=1000)
features_train, features_test, target_train, target_test = train_test_split(
features, target, test_size=0.1, random_state=1)

In [10]:
# Set random seed
torch.manual_seed(0)
np.random.seed(0)
# Convert data to PyTorch tensors
x_train = torch.from_numpy(features_train).float()
y_train = torch.from_numpy(target_train).float().view(-1, 1)
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(target_test).float().view(-1, 1)

In [11]:
# Define a neural network using `Sequential`
class SimpleNeuralNet(nn.Module):
    def __init__(self):
        super(SimpleNeuralNet, self).__init__()
        self.sequential = torch.nn.Sequential(
        torch.nn.Linear(10, 16),
        torch.nn.ReLU(),
        torch.nn.Linear(16,16),
        torch.nn.ReLU(),
        torch.nn.Linear(16, 1),
        torch.nn.Dropout(0.1), # Drop 10% of neurons
        torch.nn.Sigmoid(),
    )
    def forward(self, x):
        x = self.sequential(x)
        return x

In [12]:
# Initialize neural network
network = SimpleNeuralNet()
# Define loss function, optimizer
criterion = nn.BCELoss()
optimizer = RMSprop(network.parameters())
# Define data loader
train_data = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
# Compile the model using torch 2.0's optimizer
network = torch.compile(network)
# Train neural network
epochs = 3
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print("Epoch:", epoch+1, "\tLoss:", loss.item())

Epoch: 1 	Loss: 0.20735645294189453
Epoch: 2 	Loss: 0.20834866166114807
Epoch: 3 	Loss: 0.25975286960601807


In [13]:
with torch.no_grad():
    output = network(x_test)
    test_loss = criterion(output, y_test)
    test_accuracy = (output.round() == y_test).float().mean()
    print("Test Loss:", test_loss.item(), "\tTest Accuracy:",
    test_accuracy.item())

Test Loss: 0.1384032666683197 	Test Accuracy: 0.8999999761581421
