In [2]:
# softmax :- e^(the data elt)/ sum (e^(all data element))
# Softmax applies the exponential function to each element, and normalizes
# by dividing by the sum of all these exponentials
# -> squashes the output to be between 0 and 1 = probability
# sum of all probabilities is 1
import torch 
import torchvision
import numpy as np

In [3]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x) , axis = 0)

In [4]:
x = np.array([1 , 2 ,3 , 4])
prob = softmax(x)
print(prob , sep = '\n')
# softmax converts your data into probablities which helps us determine which output is more likely to happen

[0.0320586  0.08714432 0.23688282 0.64391426]


In [8]:
#using inbuilt softmax 
new_tensor = torch.tensor([1 , 2 ,3 , 4] , dtype = torch.float32)
prob_new = torch.softmax(new_tensor , dim = 0)# dim = 0 means calculate softmax around 0th axis
print(prob_new)

tensor([0.0321, 0.0871, 0.2369, 0.6439])


In [9]:
# as we can see similar values are generated

![cross_entropy.png](attachment:cross_entropy.png)

In [10]:
# softmax is usually paired with cross-entropy 

In [11]:
# implementinng softmax:- 
# Cross entropy
# Cross-entropy loss, or log loss, measures the performance of a classification model 
# whose output is a probability value between 0 and 1. 
# -> loss increases as the predicted probability diverges from the actual label
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])
# we are not dividing by N

In [12]:

# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [13]:
# as we can see in the first eg 1 has the prob of - 0.7 greatest of all , so loss is low
# in the next one 1 - is 0.1 which isn't the highest , so the loss is high


In [14]:
# now we'll apply cross entropy loss which is given in pytorch

![nn.crossentropy.png](attachment:nn.crossentropy.png)

In [15]:
# we don't need to apply softmax seprately when using nn.CrossEntropyLoss
# as it is already implemented inside
# the y actual labels shouldn't need to be one hot encoded 
# y_pred which will be given in should not need to be softmax massaged , rather give the raw one

In [16]:
import torch.nn as nn


In [19]:
loss = nn.CrossEntropyLoss()

y = torch.tensor([0]) # this is the correct class:- '0' , if there were 3 classes
# then out y_pred will have 3 values/probablities for each sample
y_pred_good = torch.tensor([[2.0 , 1.0 , 0.1]])# good predections
y_pred_bad = torch.tensor([[0.5 , 2.0 , 0.3]])# bad predicitons
l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)
print("good prediciton" , l1.item())
print("bad prediciton" , l2.item())

good prediciton 0.4170299470424652
bad prediciton 1.840616226196289
