In [47]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler



In [48]:
data = load_breast_cancer()
X,y = data.data,data.target
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [49]:
X_train,X_test,Y_train,Y_test = train_test_split(torch.tensor(X,dtype=torch.float32),
                                                torch.tensor(y,dtype=torch.float32),
                                                test_size = 0.2,
                                                random_state = 42)

In [50]:
class BinaryClassifier(nn.Module):
    def __init__(self,activation):
        super().__init__()
        self.fc1 = nn.Linear(30,16)
        self.fc2 = nn.Linear(16,1)
        self.activation = activation
    def forward(self,x):
        x = self.activation(self.fc1(x))
        return torch.sigmoid(self.fc2(x))

In [51]:
models = {
    "sigmoid Hidden Layer" : BinaryClassifier(nn.Sigmoid()),
    "tanh Hidden Layer" : BinaryClassifier(nn.Tanh()),
    "relu Hidden Layer" : BinaryClassifier(nn.ReLU())
}

In [52]:
def train(model):
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    for epoch in range(200):
        y_pred = model(X_train).squeeze()
        loss = criterion(y_pred,Y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        preds = model(X_test).squeeze().round()
        accuracy = (preds==Y_test).float().mean().item()
    return accuracy


results = {name:train(model) for name,model in models.items()}

In [53]:
print("\nActivation Function Performance Comparison:\n")
print("Model | Accuracy")
print("-" * 25)
for name, acc in results.items():
    print(f"{name} | {acc:.4f}")


Activation Function Performance Comparison:

Model | Accuracy
-------------------------
sigmoid Hidden Layer | 0.9825
tanh Hidden Layer | 0.9912
relu Hidden Layer | 0.9737


In [54]:
class DeepNetwork(nn.Module):
    def __init__(self, activation):
        super().__init__()
        self.layers = nn.ModuleList()
        input_dim = 30
        for _ in range(5):
            self.layers.append(nn.Linear(input_dim, input_dim))
        
        self.activation = activation
        self.output = nn.Linear(input_dim, 1)

    def forward(self, x):
        for layer in self.layers:
            x = self.activation(layer(x))
        return torch.sigmoid(self.output(x))

In [55]:
def test_activation(activation, name):
    model = DeepNetwork(activation)
    x = torch.randn(64, 30)
    y = torch.randint(0, 2, (64, 1)).float()

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    output = model(x)
    loss = criterion(output, y)
    
    optimizer.zero_grad()
    loss.backward()
    grads = []
    for layer in model.layers:
        grads.append(layer.weight.grad.abs().mean().item())

    return name, grads

In [56]:
results = []
results.append(test_activation(nn.Sigmoid(), "Sigmoid"))
results.append(test_activation(nn.Tanh(), "Tanh"))
results.append(test_activation(nn.ReLU(), "ReLU"))

In [57]:
print("\nVanishing Gradient Demonstration (Deep Network)\n")
print("Layer | Sigmoid | Tanh | ReLU")
print("------------------------------------------")

for i in range(5):
    print(f"{i+1:>2}    | "
          f"{results[0][1][i]:.6f} | "
          f"{results[1][1][i]:.6f} | "
          f"{results[2][1][i]:.6f}")


Vanishing Gradient Demonstration (Deep Network)

Layer | Sigmoid | Tanh | ReLU
------------------------------------------
 1    | 0.000000 | 0.000151 | 0.000107
 2    | 0.000003 | 0.000176 | 0.000091
 3    | 0.000025 | 0.000216 | 0.000100
 4    | 0.000147 | 0.000244 | 0.000082
 5    | 0.001196 | 0.000462 | 0.000264
