# Activation Function

In [3]:
import torch
import torch.nn as nn
import numpy as np

# Generate toy circular dataset
def generate_circular_data(n_samples=1000):
    X = np.random.uniform(-1.5, 1.5, (n_samples, 2))
    y = (X[:, 0]**2 + X[:, 1]**2 < 1).astype(int)  # 1 for inside, 0 for outside
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

X, y = generate_circular_data()

# Linear Model (No activation)
class LinearNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10, 2)  # 2 classes
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)  # No non-linear activation
        return x

# Non-Linear Model (ReLU)
class NonLinearNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 10)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(10, 2)
    def forward(self, x):
        x = self.relu(self.fc1(x))  # Non-linear activation
        x = self.fc2(x)
        return x

# Training function (simplified)
def train_model(model, X, y, epochs=100):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for _ in range(epochs):
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
    return model

# Train both models
linear_model = LinearNN()
non_linear_model = NonLinearNN()
train_model(linear_model, X, y)
train_model(non_linear_model, X, y)

# Evaluate accuracy (simplified)
def get_accuracy(model, X, y):
    with torch.no_grad():
        outputs = model(X)
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y).float().mean().item()
    return accuracy

print("Linear Model Accuracy:", get_accuracy(linear_model, X, y))    # Likely ~50-60%
print("Non-Linear Model Accuracy:", get_accuracy(non_linear_model, X, y))  # Likely ~90%+

Linear Model Accuracy: 0.6420000195503235
Non-Linear Model Accuracy: 0.9020000100135803


Available Activations in PyTorch

In [5]:
import torch
import torch.nn as nn

# Example inputs
x = torch.tensor([-1.0, 0.0, 3.0])

# Common activation functions
sigmoid = nn.Sigmoid()(x)  # [0.269, 0.5, 0.731]
relu = nn.ReLU()(x)        # [0.0, 0.0, 1.0]
tanh = nn.Tanh()(x)        # [-0.761, 0.0, 0.761]
gelu = nn.GELU()(x)        # [-0.159, 0.0, 0.841]
leaky_relu = nn.LeakyReLU(0.1)(x)  # [-0.1, 0.0, 1.0]

print("Sigmoid:", sigmoid)
print("ReLU:", relu)
print("Tanh:", tanh)
print("GELU:", gelu)
print("Leaky ReLU:", leaky_relu)

Sigmoid: tensor([0.2689, 0.5000, 0.9526])
ReLU: tensor([0., 0., 3.])
Tanh: tensor([-0.7616,  0.0000,  0.9951])
GELU: tensor([-0.1587,  0.0000,  2.9960])
Leaky ReLU: tensor([-0.1000,  0.0000,  3.0000])


## ReLU Activation Function Code

In [1]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

In [2]:
outputs = []
for i in inputs:
    if i > 0:
        outputs.append(i)
    else:
        outputs.append(0)

outputs

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]

In [3]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

In [4]:
outputs = []
for i in inputs:
    outputs.append(max(i,0))

outputs

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]

https://numpy.org/devdocs/reference/generated/numpy.maximum.html

In [8]:
import numpy as np
output = np.maximum(0, inputs)
output

array([0. , 2. , 0. , 3.3, 0. , 1.1, 2.2, 0. ])

In [14]:
class Activation_ReLU:

    # Forward pass
    def forward(self,inputs):
        # Calulator output values from 
        self.outputs =  np.maximum(0,inputs)

In [13]:
class Layer_Dense:

    def __init__(self,n_inputs,n_neurons):
        self.weights =  0.01 * np.random.rand(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    
    def forward(self,inputs):
        self.outputs = np.dot(inputs, self.weights) + self.biases
    

In [29]:
from nnfs.datasets import spiral_data


In [30]:
# Create dataset
X, y = spiral_data(samples=100, classes=3)

In [31]:
# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

(2, 3)


In [21]:
X.shape

(300, 2)

In [32]:
# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

In [33]:
dense1.forward(X)

(300, 2)
(2, 3)
(1, 3)


In [18]:
activation1.forward(dense1.outputs)

In [20]:
activation1.outputs[:5]

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 2.43705921e-06, 4.92539899e-05],
       [8.20536938e-06, 2.86095750e-05, 1.79837876e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.99606761e-04, 9.11498665e-05, 4.22687713e-04]])

# The Softmax Activation Function

<img src='./assets/formula_softmax.png' width=400  />

In [34]:
layer_outputs = [4.8, 1.21, 2.385]

In [35]:
# e - mathematical constant, we use E here to match a common coding
# style where constants are uppercased
E = 2.71828182846 # you can also use math.e

In [37]:
# For each value in a vector, calculate the exponential value
exp_values = []
for output in layer_outputs:
    exp_values.append(E ** output) # ** - power operator in Python


In [38]:
print('exponentiated values:')
print(exp_values)

exponentiated values:
[121.51041751893969, 3.3534846525504487, 10.85906266492961]


In [40]:
norm_base = sum(exp_values)

In [42]:
norm_values = []
for value in exp_values:
    norm_values.append(value / norm_base)


In [44]:
print(f"Normalized exponentiated values: {norm_values}")

Normalized exponentiated values: [0.8952826639573506, 0.024708306782070668, 0.08000902926057876]


In [50]:
print(f"Sum of normalized values: {sum(norm_values)}")

Sum of normalized values: 1.0


In [51]:
import numpy as np

In [52]:
layer_outputs = [4.8, 1.21, 2.385]

https://www.geeksforgeeks.org/numpy-exp-python/

In [53]:
# For each value in a vector, calculate the exponential values
exp_values = np.exp(layer_outputs)

In [54]:
print(f"Exponentiated values: {exp_values}")

Exponentiated values: [121.51041752   3.35348465  10.85906266]


In [57]:
# Normalized value
norm_values = exp_values / np.sum(exp_values)
print(f'normalized exponentiated values: {norm_values}')
print('sum of normalized values:', np.sum(norm_values))

normalized exponentiated values: [0.89528266 0.02470831 0.08000903]
sum of normalized values: 0.9999999999999999


https://www.geeksforgeeks.org/numpy-sum-in-python/

In [69]:
layer_outputs = np.array([[4.8, 1.21, 2.385],
                            [8.9, -1.81, 0.2],
                            [1.41, 1.051, 0.026]])

In [72]:
print('Sum without axis', np.sum(layer_outputs))

Sum without axis 18.172


In [73]:
print('This will be identical to the above since default is None:',np.sum(layer_outputs, axis=None))

This will be identical to the above since default is None: 18.172


In [74]:
print('Another way to think of it w/ a matrix == axis 0: columns:', np.sum(layer_outputs, axis=0))

Another way to think of it w/ a matrix == axis 0: columns: [15.11   0.451  2.611]


In [75]:
print("But we want to sum the rows instead, like this w/ raw py", np.sum(layer_outputs, axis=1))

But we want to sum the rows instead, like this w/ raw py [8.395 7.29  2.487]


In [76]:
print('Sum axis 1, but keep the same dimension as input ',np.sum(layer_outputs, axis=1, keepdims=True))

Sum axis 1, but keep the same dimension as input  [[8.395]
 [7.29 ]
 [2.487]]


In [1]:
import numpy as np

In [2]:
class Acitvation_Softmax:

    # Forward pass
    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        print(inputs - np.max(inputs,axis=1,keepdims=True))
        print(f"exp_values == {exp_values}")
        print(np.max(inputs,axis=1,keepdims=True))
        # Normalized them for each example
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities


In [3]:
softmax = Acitvation_Softmax()

In [4]:
softmax.forward([[1,2,3]])

[[-2 -1  0]]
exp_values == [[0.13533528 0.36787944 1.        ]]
[[3]]


In [5]:
softmax.output

array([[0.09003057, 0.24472847, 0.66524096]])

In [7]:
softmax.forward([[-2,-1,0]])


[[-2 -1  0]]
exp_values == [[0.13533528 0.36787944 1.        ]]
[[0]]


In [8]:
softmax.output

array([[0.09003057, 0.24472847, 0.66524096]])

In [9]:
softmax.forward([[0.5, 1, 1.5]])

[[-1.  -0.5  0. ]]
exp_values == [[0.36787944 0.60653066 1.        ]]
[[1.5]]


In [10]:
softmax.output

array([[0.18632372, 0.30719589, 0.50648039]])

In [11]:
from nnfs.datasets import  spiral_data


In [16]:
X, y = spiral_data(samples=100, classes=3)
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()
dense2 = Layer_Dense(3, 3)
activation2 = Acitvation_Softmax()
dense1.forward(X)
activation1.forward(dense1.outputs)
dense2.forward(activation1.outputs)
activation2.forward(dense2.outputs)


[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00 -4.41134674e-07 -4.23640278e-07]
 [ 0.00000000e+00 -1.29240077e-06 -1.26872340e-06]
 [ 0.00000000e+00 -2.48479969e-06 -2.49803682e-06]
 [ 0.00000000e+00 -3.30416730e-06 -3.32469101e-06]
 [ 0.00000000e+00 -3.90115059e-06 -3.87650504e-06]
 [ 0.00000000e+00 -4.38345275e-06 -4.33483322e-06]
 [ 0.00000000e+00 -4.71518122e-06 -4.63979984e-06]
 [ 0.00000000e+00 -6.37234193e-06 -6.44541518e-06]
 [ 0.00000000e+00 -7.19301792e-06 -7.27303244e-06]
 [ 0.00000000e+00 -5.62481991e-06 -5.80999368e-06]
 [ 0.00000000e+00 -7.02200444e-06 -7.20484108e-06]
 [ 0.00000000e+00 -9.73756106e-06 -9.82871399e-06]
 [ 0.00000000e+00 -1.61860805e-06 -1.91331541e-06]
 [ 0.00000000e+00 -3.76446545e-06 -4.07554898e-06]
 [ 0.00000000e+00 -3.70158671e-07 -7.08598874e-07]
 [ 0.00000000e+00 -1.11584779e-07 -1.54258105e-07]
 [ 0.00000000e+00 -3.14402958e-06 -3.52775050e-06]
 [ 0.00000000e+00 -6.22667493e-06 -6.61652628e-06]
 [ 0.00000000e+00 -5.30321574e-

In [17]:
activation2.output[:5]

array([[0.33333333, 0.33333333, 0.33333333],
       [0.33333343, 0.33333328, 0.33333329],
       [0.33333362, 0.33333319, 0.33333319],
       [0.33333389, 0.33333306, 0.33333305],
       [0.33333407, 0.33333297, 0.33333296]])

<img src='./assets/explain-substract.png' width=800 />