In [1]:
# softmax and cross-entropy loss

In [2]:
# softmax

import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x) , axis = 0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)

print('softmax numpy : ', outputs)

softmax numpy :  [0.65900114 0.24243297 0.09856589]


In [4]:
# softmax using pytorch

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim = 0)
print('sofmax pytorch : ', outputs)

sofmax pytorch :  tensor([0.6590, 0.2424, 0.0986])


In [5]:
# cross entropy is a loss which works on one-hot encoded values of actual y and probabilities on predicted y


In [8]:
def cross_entropy(actual, predicted):
    loss = -np.sum( actual * np.log(predicted))
    return loss  # /float(predicted.shape[0])

Y = np.array([1,0,0])   # must be one-hot encoding

Y_pred_good = np.array([0.7,0.2,0.1])
Y_pred_bad  = np.array([0.1,0.3,0.6])

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f'loss1 numpy :  {l1:.4f}')
print(f'loss2 numpy :  {l2:.4f}')

loss1 numpy :  0.3567
loss2 numpy :  2.3026


In [9]:
# using pytorch

loss = nn.CrossEntropyLoss()  # it applies both log softmax and negative log likeklihood loss

# y should not be one hot, y_preds has raw scores not softmax


In [11]:
Y = torch.tensor([0])  # not one hot
# size id no_samples * nclasses  -- lets say 1 sample and 3 classes
Y_pred_good = torch.tensor([[2.0,1.0,0.1]])
Y_pred_bad  = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'loss1 numpy :  {l1.item():.4f}')
print(f'loss2 numpy :  {l2.item():.4f}')

loss1 numpy :  0.4170
loss2 numpy :  1.8406


In [13]:
# to get actual predictions

_, predictions = torch.max(Y_pred_good , 1)
_, predictions_bad = torch.max(Y_pred_bad , 1)
print(predictions)
print(predictions_bad)

tensor([0])
tensor([1])


In [14]:
# for three samples
Y = torch.tensor([2,0,1])  # not one hot
Y_pred_good = torch.tensor([[0.1,1.0,2.1], [2.0,1.0,0.1], [0.2,2.0,0.1]])

l1 = loss(Y_pred_good, Y)

_, predictions = torch.max(Y_pred_good , 1)
print(predictions)

tensor([2, 0, 1])


In [None]:
# Binary Classification Neural Network

In [19]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size , hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size , 1)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred
        

In [20]:
model = NeuralNet1(input_size = 28*28, hidden_size=5)
criterion = nn.BCELoss()  

In [21]:
# Multi class Neural Network

In [22]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size , hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size , num_classes)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out
        

In [23]:
model = NeuralNet2(input_size = 28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  #applies softmax too..