### Implement softmax and compare it to torch.softmax for torch.nn

**Softmax** is S(y_i) = exp(y_i) / sum(exp(y_j))

In [1]:
import numpy as np

def softmaks(x):
    return np.exp(x)/np.sum(np.exp(x), axis=0)

In [2]:
test = np.array([3.0, 0.5, 0.1])
print("softmaks:", softmaks(test))

softmaks: [0.87942377 0.0721875  0.04838873]


In [3]:
import torch

test_tensor = torch.tensor(test)
print("torch.softmax: ", torch.softmax(test_tensor,dim=0))

torch.softmax:  tensor([0.8794, 0.0722, 0.0484], dtype=torch.float64)


Impement ** Cross-entropy ** and compare it to torch.Cross_entropy

D(Y_hat, Y) = -1/N ** sum(Y_j * log(Y_hat_j))

Loss increases if predicted probability diverges from actual label
Better prediction (less divergence) -> lower loss

One-hot encoded label, e.g. Y=[[1,0,0]], Y_hat=[[0.7,0.2,0.1]]'

In [4]:
def kryss_entropi(y, y_hat):
    tap = -np.sum(y * np.log(y_hat))
    return tap # /y_hat.shape[0]

In [5]:
Y = np.array([1,0,0])
Y_pred1 = np.array([0.7,0.2,0.1])
Y_pred2 = np.array([0.1,0.3,0.6])

# First prediction should have a low loss (cross entropy), second should have a high loss

print(f'Good prediction: {kryss_entropi(Y,Y_pred1):.2f}')
print(f'Bad prediction: {kryss_entropi(Y,Y_pred2):.2f}')

Good prediction: 0.36
Bad prediction: 2.30


In [6]:
# Cross-entropy with pytorch
import torch.nn as nn

# convert arrays to tensors
# Let Y have labes (classes): 0, 1, 2 (e.g Red, Green, Blue)

Y = torch.tensor([2,0]) # 2 examples (with correct class labels (not one-hot as above))

# n_samples x m_classes = 2x3
# (use raw values (softmax is included in pytorch Loss function))
Y_pred1 = torch.tensor([[0.1, 0.2, 2.1], [3.1, 0.08, 0.2]])    # good prediction
Y_pred2 = torch.tensor([[0.5, 3.1, 0.3], [0.1, 0.2, 1.8]])     # bad prediction

loss = nn.CrossEntropyLoss()   # select loss function

loss1 = loss(Y_pred1, Y)
loss2 = loss(Y_pred2, Y)

print(f'Correct class labels: {Y}')

print(f'Predicted class (good pred): {torch.max(Y_pred1, 1)[1]}')
print(f'Predicted class (bad pred): {torch.max(Y_pred2, 1)[1]}')

print(f'Good prediction: {loss1:.2f}')
print(f'Bad prediction: {loss2:.2f}')



Correct class labels: tensor([2, 0])
Predicted class (good pred): tensor([2, 0])
Predicted class (bad pred): tensor([1, 2])
Good prediction: 0.17
Bad prediction: 2.48


In [7]:
# Example (multiclass) network

class MyNet(nn.Module):
    def __init__(self, input_size, hidden_size, n_classes):
        super(MyNet, self).__init__()
        self.lin1 = nn.Linear(input_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, n_classes)

    def forward(self, x):
        out = torch.relu(self.lin1(x))     # Activation after layer 1
        out = self.lin2(out)
        return out

# instantiate network
model = MyNet(input_size=28*28, hidden_size=5, n_classes=3)
crit = nn.CrossEntropyLoss()  # has softmax

In [10]:
# Example (binary classification) network

class BinNet(nn.Module):
    def __init__(self, input_size, hidden_size):       # no n_classes
        super(BinNet, self).__init__()
        self.lin1 = nn.Linear(input_size, hidden_size) # Layer 1 (in_features, out_features)
        self.relu = nn.ReLU()                          # Activation after layer 1
        self.lin2 = nn.Linear(hidden_size, 1)          # Layer 2 (only 1 class out)
    
    def forward(self, x):
        out = self.lin1(x)
        out = self.relu(out)
        out = self.lin2(out)
        y_pred = torch.sigmoid(out)
        return y_pred

#instantiate network
bin_Model = BinNet(input_size=28*28, hidden_size=5) # no n_class inputs
crit = nn.BCELoss()   # binary cross entropy loss