In [None]:
# Softmax
# Normalizes all the predicted values by converting them to probabilites that add up to 1.

# Softmax formula
# x = [x1, x2, x3, ... xn]
# s(xi) = exp(xi)/sum(exp(x))

import numpy as np
import torch

def softmax(x):
    # e = np.exp(x)
    # e_sum = np.sum(np.exp(x), axis=0)
    # r = e/e_sum
    # return r
    return np.exp(x)/np.sum(np.exp(x), axis=0)

# softmax with numpy array using custom softmax function
x = np.array([5.4, 3.6, 7.1, 1.9, 4.4])
print(f"Numpy array: {x}")
x_sm = softmax(x)
print(f"Softmaxed numpy array: {x_sm}")

# softmax with tensors using in built softmax function
tx = torch.from_numpy(x)
print(f"Tensor: {tx}")
tx_sm = torch.softmax(tx, dim=0)
print(f"Softmaxed Tensor: {tx_sm}")


In [None]:
# Cross entropy

# Formula
# cross_entropy D(y_predicted, y_label) = -1/N * sum(y_label_i * log(y_predicted_i))
# Lower is the accuracy/correctness of the predicted value, higher is the cross-entropy loss

import numpy as np
import torch
import torch.nn as nn

# cross entropy with numpy arrays

# Note assumptions for custom cross_entropy loss method
# Label values must be one hot encoded labels e.g. [0, 0, 1, 0, 0]
# Predicted values must be softmaxed probabilities
def cross_entropy(y_predicted, y_label):
    return -1 * np.sum(y_label * np.log(y_predicted))
    # why not divide by the count of the elements in array

# assume 3 values of class labels [0, 1, 2]. With one-hot only the correct one is set.
y_label = [1, 0, 0]

y_pred_good = [0.4, 0.3, 0.3]
y_pred_very_good = [0.8, 0.1, 0.1]
y_pred_bad = [0.1, 0.6, 0.3]
y_pred_very_bad = [0.4, 0.5, 0.1]

print(f"Numpy Cross entropy for good prediction: {cross_entropy(y_pred_good, y_label)}")
print(f"Numpy Cross entropy for very good prediction: {cross_entropy(y_pred_very_good, y_label)}")
print(f"Numpy Cross entropy for bad prediction: {cross_entropy(y_pred_bad, y_label)}")
print(f"Numpy Cross entropy for very bad prediction: {cross_entropy(y_pred_very_bad, y_label)}")


# cross entropy with nn built in functions
# No softmax in last layer of neural networks because
# nn.CrossEntropyLoss applies (log softmax + negative log likelihood loss)
# y_labes has class labels, not One-Hot encoding
# y_predicted has raw scores, no softmax

loss = nn.CrossEntropyLoss()

# single sample
# assume 3 values of class labels [0, 1, 2]
y_label = torch.tensor([1])
# n_samples * n_classes = 1*3
y_prediction_good = torch.tensor([[1.0, 2.0, 0.1]])
y_prediction_bad = torch.tensor([[7.5, 4.2, 0.9]])

print(f"Single sample: Pytorch Cross entropy loss for good prediction: {loss(y_prediction_good, y_label):3f}")
print(f"Single sample: Pytorch Cross entropy loss for bad prediction: {loss(y_prediction_bad, y_label):3f}")

# multiple samples
