Activation Functions - Ex Relu, sigmoid 

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim


**Explanation of the Code in next cell:**

1. **Tensor Creation:**

The first line creates a 3x3 tensor (matrix) using PyTorch with values defined as raw scores (logits). These logits represent unnormalized scores, and the tensor looks like this:

$$
\text{logits} = \begin{bmatrix} 2.5 & 0.4 & 1.1 \\ 0.1 & 2.2 & 0.6 \\ 0.3 & 0.3 & 3.0 \end{bmatrix}
$$

2. **Softmax Application:**

The second line applies the softmax function along each row of the tensor (along `dim=1`). Softmax normalizes these logits into probabilities by computing exponentials of the values, followed by normalizing them so that the sum of each row equals 1.

### Calculation for the First Row:

For the first row of logits \([2.5, 0.4, 1.1]\), the softmax probabilities are calculated as follows:

1. **Exponentiate each element:**

$$
e^{2.5} = 12.1825, \quad e^{0.4} = 1.49182, \quad e^{1.1} = 3.00417
$$

2. **Sum the exponentials:**

$$
12.1825 + 1.49182 + 3.00417 = 16.67849
$$

3. **Calculate softmax probabilities by dividing each exponential by the sum:**

$$
\text{softmax}(2.5) = \frac{12.1825}{16.67849} = 0.7306, \quad \text{softmax}(0.4) = \frac{1.49182}{16.67849} = 0.0895, \quad \text{softmax}(1.1) = \frac{3.00417}{16.67849} = 0.1800
$$

Thus, the softmax probabilities for the first row are approximately:

$$
[0.7306, 0.0895, 0.1800]
$$



In [32]:
logits = torch.tensor([[2.5,0.4,1.1], [0.1,2.2,0.6],[0.3,0.3,3.0]]) #creates a 3D matrix tensor
softmax_prob = torch.softmax(logits, dim=1)
print(softmax_prob)


tensor([[0.7304, 0.0894, 0.1801],
        [0.0925, 0.7551, 0.1524],
        [0.0592, 0.0592, 0.8815]])


Example Class 1

In [33]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.LeakyReLU = nn.LeakyReLU(negative_slope=0.01) #use leaky relu activation fucntionn
        self.layer2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.LeakyReLU(x)
        x = self.layer2(x)

        return x
    
input_size = 10
hidden_size =  20
output_size = 1

model = SimpleNN(input_size, hidden_size, output_size)

input_data = torch.randn(5, input_size) #crreate a 5,10 tensor
print(input_data.shape)

output_data = model(input_data)

print(input_data)
print(output_data)

torch.Size([5, 10])
tensor([[ 1.4940, -1.1026,  0.3225, -1.3345,  0.9836, -1.0878, -1.5907,  0.5868,
          0.3288,  0.6612],
        [ 0.2787,  0.5131, -1.3601,  0.5727,  1.9966, -1.4488, -1.5884, -0.0986,
          1.1919, -0.7555],
        [-1.1330, -1.9908,  0.6333,  0.4332,  1.1353,  0.4069, -1.2508, -0.1780,
          0.3243, -2.2465],
        [-1.1098,  2.1682,  1.1992,  0.7613,  0.0981,  1.3720,  0.4172,  0.0177,
         -0.4255, -0.1528],
        [-1.2229,  1.1538, -0.4244, -0.7226,  1.7763,  0.6990, -1.7814, -0.8643,
          0.2658,  0.6488]])
tensor([[0.5391],
        [0.2320],
        [0.5487],
        [0.2900],
        [0.4224]], grad_fn=<AddmmBackward0>)


Example class 2 - use activation function within forward pass

In [34]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__() #call constrictor of parent class, and inherit from the parent class
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self,x):
        x_relu = torch.relu(self.layer1(x))
        x_selu = torch.selu(self.layer2(x_relu))
        return  x_relu, x_selu

input_size = 10
hidden_size =  20
output_size = 1

model = SimpleNN(input_size, hidden_size, output_size)

input_data = torch.randn(5, input_size) #crreate a 5,10 tensor
print(input_data.shape)

output_relu, output_selu = model(input_data)

print(input_data)
print(output_relu)
print(output_selu)

torch.Size([5, 10])
tensor([[ 1.5656,  1.0194,  0.5143,  1.4034,  0.2577,  1.5063,  0.9214,  1.2059,
          0.2321, -0.5806],
        [ 2.0721,  0.9875,  2.2715,  0.1837, -0.7467, -1.1497, -0.2317, -0.2960,
          0.8824, -0.4550],
        [ 1.2285, -0.9806, -1.9417, -1.4106,  0.2618,  0.1571, -1.2504,  0.2217,
         -0.3036,  2.1354],
        [ 0.7615,  0.0564, -1.6237,  0.8006, -1.0985, -1.5476,  0.1067, -0.9702,
         -0.0517, -1.7567],
        [-0.9281,  1.3078, -0.3212, -0.3281,  1.3185, -2.0431,  1.3319, -1.5082,
         -2.0685, -0.1016]])
tensor([[1.3799, 0.0000, 0.1703, 0.0000, 0.0000, 0.4438, 0.4112, 0.5125, 0.2169,
         0.0000, 0.0311, 0.0000, 0.7615, 0.8759, 0.0000, 0.4059, 0.0000, 0.0000,
         0.1900, 0.0000],
        [0.0000, 0.4798, 1.2891, 0.0000, 0.0000, 0.0000, 0.1439, 1.2765, 1.1154,
         0.3278, 0.3918, 0.9152, 0.8188, 0.5572, 1.0659, 0.0000, 0.0000, 0.8190,
         0.5116, 0.0000],
        [0.0000, 0.4218, 0.2870, 0.0770, 0.3344, 0.0000, 0