# NLL loss

The current mlpack implementation is probably not correct (?)


A basic NumPy implementation is included below.
```python
import torch
import numpy as np

x = torch.tensor([[-0.1689, -2.0033, -3.8886],
                  [-0.2862, -1.9392, -2.2532],
                  [-1.0543, -0.6196, -2.1769],
                  [-1.2865, -1.4797, -0.7011]])
y = torch.tensor([2, 2, 1, 2])

x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    lst.append(-x[k][y[k]]) # y[k] = currentTarget variable in mlpack 

print(lst)           # none reduction
print(np.mean(lst))  # mean reduction 
```

### Imports and installation of mlpack

In [1]:
%%capture
!sudo apt-get install libmlpack-dev 
import torch
import torch.nn as nn

### PyTorch

#### Input generation with fixed seeds

In [2]:
import random
import os
import numpy as np

def fix_seeds(seed=0):
  SEED = seed
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(SEED)
  np.random.seed(SEED)
  torch.manual_seed(SEED)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  if (torch.cuda.is_available()):
    torch.cuda.manual_seed(SEED)

fix_seeds()

In [3]:
x0 = torch.randn(4, 3)   
x = nn.LogSoftmax(dim=1)(x0)       # 4 samples, 3 classes (NLL is a multi-label loss function)
y = torch.LongTensor(4).random_(3) # choose 4 numbers randomly in the range 1 to 3, as [ 0 <= targets[i] <= num_classes - 1 ] is required by PyTorch.

print('Input : ')
print(x)
print('Target : ')
print(y)

Input : 
tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011]])
Target : 
tensor([2, 2, 1, 2])


#### None Reduction


In [4]:
loss = torch.nn.NLLLoss(reduction='none')
input = torch.tensor([[-0.1689, -2.0033, -3.8886],
                      [-0.2862, -1.9392, -2.2532],
                      [-1.0543, -0.6196, -2.1769],
                      [-1.2865, -1.4797, -0.7011]], requires_grad=True)
target = torch.tensor([2, 2, 1, 2])
output = loss(input, target)
output.backward(torch.ones(target.shape))
print("Input : ")
print(input)
print("Target : ")
print(target)
print("FORWARD : ")
print("Loss : ")
print(output)
print("BACKWARD : ")
print(input.grad)

Input : 
tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011]], requires_grad=True)
Target : 
tensor([2, 2, 1, 2])
FORWARD : 
Loss : 
tensor([3.8886, 2.2532, 0.6196, 0.7011], grad_fn=<NllLossBackward>)
BACKWARD : 
tensor([[ 0.,  0., -1.],
        [ 0.,  0., -1.],
        [ 0., -1.,  0.],
        [ 0.,  0., -1.]])


#### Sum Reduction

In [5]:
loss = torch.nn.NLLLoss(reduction='sum')
input = torch.tensor([[-0.1689, -2.0033, -3.8886],
                      [-0.2862, -1.9392, -2.2532],
                      [-1.0543, -0.6196, -2.1769],
                      [-1.2865, -1.4797, -0.7011]], requires_grad=True)
target = torch.tensor([2, 2, 1, 2])
output = loss(input, target)
output.backward()
print("Input : ")
print(input)
print("Target : ")
print(target)
print("FORWARD : ")
print("Loss : ")
print(output)
print("BACKWARD : ")
print(input.grad)

Input : 
tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011]], requires_grad=True)
Target : 
tensor([2, 2, 1, 2])
FORWARD : 
Loss : 
tensor(7.4625, grad_fn=<NllLossBackward>)
BACKWARD : 
tensor([[ 0.,  0., -1.],
        [ 0.,  0., -1.],
        [ 0., -1.,  0.],
        [ 0.,  0., -1.]])


#### Mean reduction

In [6]:
loss = torch.nn.NLLLoss(reduction='mean')
input = torch.tensor([[-0.1689, -2.0033, -3.8886],
                      [-0.2862, -1.9392, -2.2532],
                      [-1.0543, -0.6196, -2.1769],
                      [-1.2865, -1.4797, -0.7011]], requires_grad=True)
target = torch.tensor([2, 2, 1, 2])
output = loss(input, target)
output.backward()
print("Input : ")
print(input)
print("Target : ")
print(target)
print("FORWARD : ")
print("Loss : ")
print(output)
print("BACKWARD : ")
print(input.grad)

Input : 
tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011]], requires_grad=True)
Target : 
tensor([2, 2, 1, 2])
FORWARD : 
Loss : 
tensor(1.8656, grad_fn=<NllLossBackward>)
BACKWARD : 
tensor([[ 0.0000,  0.0000, -0.2500],
        [ 0.0000,  0.0000, -0.2500],
        [ 0.0000, -0.2500,  0.0000],
        [ 0.0000,  0.0000, -0.2500]])


### mlpack


##### CURRENT IMPLEMENTATION - implements sum reduction, but probably incorrectly {Results don't match PyTorch output}

In [7]:
%%capture
%%writefile test.cpp  

#include <iostream>
#include <armadillo>

using namespace std;
using namespace arma;

int main()
{
  // Constructor
  arma::mat x,y;
 
  // PyTorch : input of shape N x C = 4 x 3, i.e N = 4, C = 3, target of shape N  = 4 
  // mlpack : input.n_rows = 4, input.n_cols = 3, target.n_rows = 1, target.n_cols = 4
 
  x << -0.1689 << -2.0033 << -3.8886 << endr
    << -0.2862 << -1.9392 << -2.2532 << endr
    << -1.0543 << -0.6196 << -2.1769 << endr
    << -1.2865 << -1.4797 << -0.7011 << endr;
 
  // Disparity I observed for bounds checking in target vector. Not sure why (?)
  // PyTorch : 0 <= target[i] < C , i.e. 0 <= target[i] < 3
  // mlpack : 0 <= currentTarget < input.n_rows, i.e 0 <= target[i] < 4
 
  y << 2 << 2 << 1 << 2 << endr;
 

  // Forward
 
  double loss_sum = 0;
  for (size_t i = 0; i < x.n_cols; ++i)
  {
    size_t currentTarget = y(i) - 1;
    loss_sum -= x(currentTarget, i);
  }

  // Backward
 
  arma::mat output;
  output = arma::zeros<arma::mat>(x.n_rows, x.n_cols);
  for (size_t i = 0; i < x.n_cols; ++i)
  {
    size_t currentTarget = y(i) - 1;
    output(currentTarget, i) = -1;
  }

  // Display
  cout << "------------------------------------------------------------------" << endl;
  cout << "USER-PROVIDED MATRICES : " << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "Input shape : "<< x.n_rows << " " << x.n_cols << endl;
  cout << "Input : " << endl << x << endl;
  cout << "Target shape : "<< y.n_rows << " " << y.n_cols << endl;
  cout << "Target : " << endl << y << endl;
  cout << "Loss : " << endl << loss_sum << endl;
  cout << "Backward output : " << endl << output << endl;
  cout << "------------------------------------------------------------------" << endl;
  return 0;
}

In [8]:
%%script bash
g++ test.cpp -o test -larmadillo && ./test

------------------------------------------------------------------
USER-PROVIDED MATRICES : 
------------------------------------------------------------------
Input shape : 4 3
Input : 
  -0.1689  -2.0033  -3.8886
  -0.2862  -1.9392  -2.2532
  -1.0543  -0.6196  -2.1769
  -1.2865  -1.4797  -0.7011

Target shape : 1 4
Target : 
   2.0000   2.0000   1.0000   2.0000

Loss : 
6.114
Backward output : 
        0        0  -1.0000
  -1.0000  -1.0000        0
        0        0        0
        0        0        0

------------------------------------------------------------------


##### NEW IMPLEMENTATION - matches exactly with PyTorch, implements all the reductions

In [9]:
%%capture
%%writefile test.cpp  

#include <iostream>
#include <armadillo>

using namespace std;
using namespace arma;

int main()
{
  // Constructor
  arma::mat x,y;
 
  // PyTorch : input of shape N x C = 4 x 3, i.e N = 4, C = 3, target of shape N = 4  
  // PyTorch : 0 <= target[i] < C , i.e. 0 <= target[i] < 3 for all i
  // PyTorch : -> Forward : l[n] = -x[n][y[n]] where  0 <= n <= N-1,  i.e l[0] = -x[0][2], l[1] = -x[1][2], l[2] = -x[2][1], l[3] = -x[3][2] , loss_none = [ l[0], l[1], l[2], l[3] ]
 
  x << -0.1689 << -2.0033 << -3.8886 << endr
    << -0.2862 << -1.9392 << -2.2532 << endr
    << -1.0543 << -0.6196 << -2.1769 << endr
    << -1.2865 << -1.4797 << -0.7011 << endr;
   
  y << 2 << 2 << 1 << 2 << endr;
 
  // Forward
  arma::mat loss_none;
  loss_none.zeros(size(y));
  for (size_t i = 0; i < y.n_cols; ++i)
  {
    size_t currentTarget = y(i);
    loss_none(i) = -x(i, currentTarget);
  }
 
  double loss_sum = arma::accu(loss_none);
  double loss_mean = loss_sum / y.n_elem;


  // Backward
  arma::mat output;
  output.zeros(size(x));
  for (size_t i = 0; i < y.n_cols; ++i)
  {
    size_t currentTarget = y(i);
    output(i, currentTarget) = -1;
  }

  // Display
  cout << "------------------------------------------------------------------" << endl;
  cout << "USER-PROVIDED MATRICES : " << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "Input shape : "<< x.n_rows << " " << x.n_cols << endl;
  cout << "Input : " << endl << x << endl;
  cout << "Target shape : "<< y.n_rows << " " << y.n_cols << endl;
  cout << "Target : " << endl << y << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "NONE " << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "Loss (none):\n" << loss_none << '\n';
  cout << "------------------------------------------------------------------" << endl;
  cout << "SUM " << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "FORWARD : " << endl;
  cout << "Loss (sum):\n" << loss_sum << '\n';
  cout << "BACKWARD : " << endl;
  cout << "Output shape : "<< output.n_rows << " " << output.n_cols << endl;
  cout << "Output (sum) : " << endl << output << endl;
  cout << "Sum of all values in this matrix : " << arma::as_scalar(arma::accu(output)) << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "MEAN " << endl;
  cout << "------------------------------------------------------------------" << endl;
  cout << "FORWARD : " << endl;
  cout << "Loss (mean):\n" << loss_mean << '\n';
  cout << "BACKWARD : " << endl;
  cout << "Output shape : "<< output.n_rows << " " << output.n_cols << endl;
  cout << "Output (mean) : " << endl << output / y.n_elem << endl;
  cout << "Sum of all values in this matrix : " << arma::as_scalar(arma::accu(output / y.n_elem)) << endl;
  cout << "------------------------------------------------------------------" << endl;

  return 0;
}

In [10]:
%%script bash
g++ test.cpp -o test -larmadillo && ./test

------------------------------------------------------------------
USER-PROVIDED MATRICES : 
------------------------------------------------------------------
Input shape : 4 3
Input : 
  -0.1689  -2.0033  -3.8886
  -0.2862  -1.9392  -2.2532
  -1.0543  -0.6196  -2.1769
  -1.2865  -1.4797  -0.7011

Target shape : 1 4
Target : 
   2.0000   2.0000   1.0000   2.0000

------------------------------------------------------------------
NONE 
------------------------------------------------------------------
Loss (none):
   3.8886   2.2532   0.6196   0.7011

------------------------------------------------------------------
SUM 
------------------------------------------------------------------
FORWARD : 
Loss (sum):
7.4625
BACKWARD : 
Output shape : 4 3
Output (sum) : 
        0        0  -1.0000
        0        0  -1.0000
        0  -1.0000        0
        0        0  -1.0000

Sum of all values in this matrix : -4
------------------------------------------------------------------
MEAN 
--