# Softmax output + Cross entropy loss 

In [99]:
import numpy.testing as npt
import numpy as np 
import torch

import pdb

In [100]:
# Doesn't work with jupyterlab
from IPython.core.display import HTML
def css_styling():
    styles = open('/Users/hjsong/Downloads/custom.css', 'r').read()
    return HTML(styles)

css_styling()

## Numpy implementation

In [127]:
class Softmax():
    def __init__(self):
        self.eVec = None
        self.qVec = None
        self.s = None
        
    def forward(self, inVec):
        self.eVec = np.exp(inVec)
        self.s = self.eVec.sum()
        self.qVec = self.eVec / self.s #todo: add a small epsilon?
        self.check_qVec()
        
    def loss(self, y):
        if self.qVec is None: 
            raise ValueError("Model must be trained before calling loss function")#todo: unTrained model. 
        return np.sum( y*np.log(self.qVec)) 

    def backward(self, y):
        if self.qVec is None:
            raise ValueError("Model must be trained before backprop")#todo: unTrained model. 

        dldqVec = y / self.qVec #todo: division by zero
        
        #D is a square mtx for gradient of qVector wrt eVector
        D = self.build_dqde()
        dldinVec = D.dot(dldqVec)
        return dldinVec
        
    def build_dqde(self):
        if self.eVec is None:
            raise ValueError("Model must be trained before building D (=dq/de)") #todo: unTrained model. 
            
        dim = len(self.eVec)
        D = np.ones(dim) * -self.eVec.reshape((dim,-1))
        for i in range(dim):
            D[i,i] = self.s - self.eVec[i]
        return D / self.s**2
    
    def check_qVec(self):
        
        flag = math.isclose(np.sum(self.qVec),1)
        if not flag:
            raise ValueError("qVec does not add upto 1!")
        return flag

    def reset(self):
        self.eVec = None
        self.qVec = None
        self.s = None
        
    def print_state(self):
        print("eVec: \n", self.eVec,
              "\ns: ", self.s,
             "\nqVec: \n", self.qVec)
        

### Test

1. Test basic constructor

In [102]:
softmax = Softmax()
x = np.array([1,5,7]).reshape((3,-1))
softmax.forward(x)
print("in:\n", x,
      "\nevec:\n", softmax.eVec,
      "\ns:", softmax.s, 
      "\nqvec:\n", softmax.qVec)
# print(np.sum(softmax.qVec))

in:
 [[1]
 [5]
 [7]] 
evec:
 [[   2.71828183]
 [ 148.4131591 ]
 [1096.63315843]] 
s: 1247.7645993594942 
qvec:
 [[0.00217852]
 [0.11894324]
 [0.87887824]]


In [103]:
print(softmax.check_qVec())

True


In [104]:
softmax.print_state()
softmax.reset()
softmax.print_state()

eVec: 
 [[   2.71828183]
 [ 148.4131591 ]
 [1096.63315843]] 
qVec: 
 [[0.00217852]
 [0.11894324]
 [0.87887824]] 
s:  1247.7645993594942
eVec: 
 None 
qVec: 
 None 
s:  None


2. Test forward and backprop

In [105]:
y_hat = np.array([10,0,0]).reshape((3,1))
softmax.forward(y_hat)
softmax.print_state()

eVec: 
 [[2.20264658e+04]
 [1.00000000e+00]
 [1.00000000e+00]] 
qVec: 
 [[9.99909208e-01]
 [4.53958078e-05]
 [4.53958078e-05]] 
s:  22028.465794806718


In [106]:
y = np.array([1,0,0]).reshape((3,1))

In [107]:
softmax.loss(y)

-9.079573746728087e-05

In [108]:
D = softmax.build_dqde(); print(D); print(D.shape)

[[ 4.12155874e-09 -4.53916863e-05 -4.53916863e-05]
 [-2.06077937e-09  4.53937471e-05 -2.06077937e-09]
 [-2.06077937e-09 -2.06077937e-09  4.53937471e-05]]
(3, 3)


In [109]:
softmax.print_state()

eVec: 
 [[2.20264658e+04]
 [1.00000000e+00]
 [1.00000000e+00]] 
qVec: 
 [[9.99909208e-01]
 [4.53958078e-05]
 [4.53958078e-05]] 
s:  22028.465794806718


## PyTorch implementation (autograd)

In [110]:
y_hat = torch.tensor(x, dtype=torch.float); print(y_hat.size())

torch.Size([3, 1])


In [165]:
# autograd version 
class Softmax2(Softmax):
    def __init__(self):
        super(Softmax2, self).__init__()
        
    def forward(self, inVec):
        #todo: use torch to calculate the forward pass. 
        # torch.tensor that requires a grad tracking cannot call .numpy
        if not isinstance(inVec, torch.Tensor) or inVec.dtype != torch.float:
            inVec = torch.tensor(inVec, dtype=torch.float)
        if len(inVec.size()) == 1:
            inVec = inVec.reshape(-1,t1.size()[0])
        print('inVec:\n', inVec)
        self.eVec = torch.exp(inVec)
        self.s = self.eVec.sum().item()
        self.qVec = self.eVec / self.s
        if self.check_qVec():
            return self.qVec
        
    def check_qVec(self):
        if self.qVec is None:
            raise ValueError("Model is not trained yet.")#todo: unTrained model. 
                
        flag = math.isclose(self.qVec.sum().item(),1)
        if not flag:
            raise ValueError("qVec does not add upto 1!")
        return flag
        
        
def cross_entropy_loss(y_hat, y):
    """Computes the cross entropy loss between two probability 
    distribution, y_hat and y"""
    if not isinstance(y_hat, torch.Tensor): y_hat = torch.tensor(y_hat, dtype=torch.float)
    if not isinstance(y, torch.Tensor): y = torch.tensor(y, dtype=torch.float)
        
    return -torch.sum(y*torch.log(y_hat)) #must return a tensor object for autograd
        

In [166]:
# Simple tests
def test_softmax2():
    model = Softmax2()
    model.print_state()
    
    inVec = torch.tensor([1,2,3], dtype=torch.float)
    qVec = model.forward(inVec)
    model.print_state()
    
    print('output of forward:\n', qVec)
    
def test_cross_entropy_loss():
    y = torch.tensor([1,0,0], dtype=torch.float).reshape(3,1)
    
    y_perfect = torch.tensor([0.99999,0.000005,0.000005], dtype=torch.float).reshape(y.shape)
    y_wrong = torch.tensor([0.00005,0.00005, 0.9999], dtype=torch.float).reshape(y.shape)
    
    print("Right: ", cross_entropy_loss(y_perfect, y).item() )
    print("Wrong: ", cross_entropy_loss(y_wrong, y).item() )

In [167]:
test_softmax2()

eVec: 
 None 
s:  None 
qVec: 
 None
inVec:
 tensor([[ 1.,  2.,  3.]])
eVec: 
 tensor([[  2.7183,   7.3891,  20.0855]]) 
s:  30.192874908447266 
qVec: 
 tensor([[ 0.0900,  0.2447,  0.6652]])
output of forward:
 tensor([[ 0.0900,  0.2447,  0.6652]])


In [161]:
#Test `cross_entropy_loss`
test_cross_entropy_loss()

Right:  1.0013630344474223e-05
Wrong:  9.903487205505371


Helper: Normalize an array-type object 

In [143]:
import numpy.linalg as LA
import math
def normalize(x, eps=1e-6):
    """x: torch.tensor, numpy.array, or python list object.
    Returns a normalized array as torch.tensor with dtype of torch.float
    Note: Don't use it if x is a tensor that requires gradient backprop."""
    if isinstance(x, torch.Tensor):
        x = x.numpy()
    s = np.sum(x)
    if math.isclose(s,0): s += eps
    normed = x / s
    assert(math.isclose(np.sum(normed),1))
    return torch.tensor(normed, dtype=torch.float) # tensor (dtype=torch.float32) can't be divided by np.float32? strange!!

In [None]:
# Now use the forward from the model and loss function as in lec 06 to use autograd for
# autodiff. Then compare the two implementations

In [168]:
softmax = Softmax2()
inVec = torch.tensor([1,2,3], dtype=torch.float, requires_grad=True )
qVec = softmax.forward(inVec)
l = cross_entropy_loss(qVec, np.array([1,0,0]))
print("Before backprop")
print(inVec.grad)

inVec:
 tensor([[ 1.,  2.,  3.]])
Before backprop
None


In [169]:
l

tensor(2.4076)

In [170]:
l.backward()
print("After backprop")
print(inVec.grad)

After backprop
tensor([-1., -0., -0.])


In [171]:
cross_entropy_criterion = torch.nn.CrossEntropyLoss()