This notebook will compare a neural network that uses activation functions vs one that does not use activation functions

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
INPUT_DIM = 100 # Number of features

In [None]:
# Generate synthetic data
X, y = make_classification(
    n_samples=5000,
    n_features=INPUT_DIM,
    n_informative=INPUT_DIM,
    n_redundant=0,
    random_state=7
)

In [None]:
X.shape, y.shape

((5000, 100), (5000,))

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape, X_test.shape

((4000, 100), (1000, 100))

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, use_activation):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(INPUT_DIM, 16)
        self.fc2 = nn.Linear(16, 1)
        self.use_activation = use_activation

    def forward(self, x):
        x = F.relu(self.fc1(x)) if self.use_activation else self.fc1(x)
        x = torch.sigmoid(self.fc2(x))
        return x

In [None]:
# Function to train and evaluate the model
def train_and_evaluate(model):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor.view(-1, 1))
        if epoch % 5 == 0:
          print(loss)
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    with torch.no_grad():
        model.eval()
        pred = model(X_test_tensor)
        predictions = (pred > 0.5).float().numpy()
        accuracy = accuracy_score(y_test_tensor, predictions)

    return accuracy

In [None]:
# Create and train the model without activation functions
model_without_activation = NeuralNetwork(use_activation=False)
accuracy_without_activation = train_and_evaluate(model_without_activation)

tensor(0.9071, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7036, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5720, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4863, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4303, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3936, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3513, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3386, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3291, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3162, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3118, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3082, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3053, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3030, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3011, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2996, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2984, grad_fn=<Bina

In [None]:
# Create and train the model with activation functions
model_with_activation = NeuralNetwork(use_activation=True)
accuracy_with_activation = train_and_evaluate(model_with_activation)

tensor(1.0072, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.8534, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7328, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6412, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5724, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5201, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4793, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4465, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4188, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3950, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3742, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3557, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3388, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3231, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3085, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2947, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2817, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2695, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.2578, grad_fn=<Bina

In [None]:
print("Accuracy without Activation Functions:", accuracy_without_activation)
print("Accuracy with Activation Functions:", accuracy_with_activation)

Accuracy without Activation Functions: 0.864
Accuracy with Activation Functions: 0.897


Note there may be situations where the adding of activation function decreases performance. This could be because adding activations causes overfitting. And maybe adding dropout could be useful.