<a href="https://colab.research.google.com/github/aravindskumar98/DL/blob/main/SoftMax_and_Cross_Entropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import torch
import numpy as np
import torch.nn as nn

##in numpy

def softmax(x):
  return np.exp(x)/np.sum(np.exp(x), axis = 0)

x = np.array([2,1,0.1])
outputs = softmax(x)

print("softmax np:", outputs)



softmax np: [0.65900114 0.24243297 0.09856589]


In [13]:
## in torch
x = torch.tensor([2.0,1.0,0.1])
outputs = torch.softmax(x, dim = 0) ##dimension should be specified, so that it computes along first axis
print("softmax np:", outputs)

softmax np: tensor([0.6590, 0.2424, 0.0986])


In [14]:
## Cross Entropy Loss = -1/n * sum(yi * log(yi_pred))
'''
This measures performance when output is between 0 and 1. Can be  used in multiclass problems also
Loss increases as predicted probability diverges from label value
'''

In [18]:
## cross entropy loss in numnpy

def cross_entropy(actual, predicted):
  loss = -np.sum(actual*np.log(predicted))
  return loss

## y must be one hot encoded
## i.e if class 0 --> [1 0 0]

Y = np.array([1,0,0])

y_pred_good = np.array([0.7,0.2,0.1])
y_pred_bad = np.array([0.1,0.3,0.6])
l1 = cross_entropy(Y,y_pred_good)
l2 = cross_entropy(Y,y_pred_bad)
print("Good",l1,"Bad",l2)

Good 0.35667494393873245 Bad 2.3025850929940455


In [20]:
## Same in Pytorch
'''
crossentropyloss already applied applies logsoftmax and then negativeloglikelihood loss NLLLoss
So we should not implement the softmax layer ourselves
Y must not be one hot encoded. Correct class labels must be present
Y_pred has raw scores, no softmax!

'''

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])
## nsamples X nclasses --> 1 x 3
y_pred_good = torch.tensor([[2.0,1.0,0.1]]) ## array inside array
y_pred_bad = torch.tensor([[0.5,2.0,0.3]]) ## array inside array

l1 = loss(y_pred_good,Y)
l2 = loss(y_pred_bad,Y)

print(l1,l2) ## good has lower cross entropy loss

tensor(0.4170) tensor(1.8406)


In [22]:
_,predictions1 = torch.max(y_pred_good,1) # 1 stands for along first dimension
_,predictions2 = torch.max(y_pred_bad,1) # 1 stands for along first dimension
print(predictions1.item(), predictions2)

0 tensor([1])


In [27]:
##increase number of samples

loss = nn.CrossEntropyLoss()

## 3 samples
Y = torch.tensor([2,0,1])
## nsamples X nclasses --> 3 x 3
y_pred_good = torch.tensor([[2.0,1.0,4],[2.0,1.0,0.1],[2.0,3.0,0.1]]) ## array inside array
y_pred_bad = torch.tensor([[5.5,2.0,0.3],[2.0,1.0,0.1],[2.0,1.0,0.1]]) ## array inside array

l1 = loss(y_pred_good,Y)
l2 = loss(y_pred_bad,Y)

print(l1,l2) ## good has lower cross entropy loss

_,predictions1 = torch.max(y_pred_good,1) # 1 stands for along first dimension
_,predictions2 = torch.max(y_pred_bad,1) # 1 stands for along first dimension
print(predictions1, predictions2)

tensor(0.3132) tensor(2.3564)
tensor([2, 0, 1]) tensor([0, 0, 0])


In [28]:
## NN with Softmax

##since we use cross entropy loss, do not use softmax at the end!!

## MULTI CLASS CLASSIFICATION

class NeuralNet2(nn.Module):
  def __init__(self,input_size,hidden_size,num_classes):
    super(NeuralNet2,self).__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size,num_classes)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out)
    ## no softmax at the end
    return out

model = NeuralNet2(input_size = 28*28, hidden_size = 5, num_classes = 3)
criterion = nn.CrossEntropyLoss() ## applies Softmax



In [None]:
## BINARY CLASSIFICATION

class NeuralNet1(nn.Module):
  def __init__(self,input_size,hidden_size):
    super(NeuralNet1,self).__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size,1)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out)
    ## sigmoid at the end!! ---> important
    y_pred = torch.sigmoid(out)
    return y_pred

model = NeuralNet2(input_size = 28*28, hidden_size = 5)
criterion = nn.BCELoss() ## applies Softmax
