## July 25 - Understanding loss functions

In [1]:
# Imports
import math
import os
import sys
import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D

# Add the path to the parent directory to augment search for module
par_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if par_dir not in sys.path:
    sys.path.append(par_dir)
    
# Import the custom plotting module
from plot_utils import plot_utils
import random
import torch

## Goal : Understand and interpret the scale of the two loss functions used ( MSE loss + KL loss ) using randomly sampled events from the dataset

## 1. Use validation samples from a recent training dump

In [34]:
# Load the event from the disk
run_id = "20190726_030809"
dump_dir = "/home/akajal/WatChMaL/VAE/dumps/" + run_id + "/"
np_arr_path = dump_dir + "iteration_" + str(28000) + ".npz"

# Load the numpy array
np_arr = np.load(np_arr_path)
np_event, np_recon, np_labels, np_energies = np_arr["events"], np_arr["prediction"], np_arr["labels"], np_arr["energies"]

# Randomly sample two events from the validation batch
i = random.randint(0, np_labels.shape[0]-1)
j = random.randint(0, np_labels.shape[0]-1)

event_i, label_i, energy_i = np_event[i], np_labels[i], np_energies[i]
event_j, label_j, energy_j = np_event[j], np_labels[j], np_energies[j]

In [35]:
print(event_i.shape, label_i, energy_i)
print(event_j.shape, label_j, energy_j)

(16, 40, 19) 2.0 [1164.5364]
(16, 40, 19) 2.0 [572.9689]


## Ok. So both events are $\mu$ events but with different true energies.

## Let us calculate the MSE loss and confirm that it is the same as the torch.MSELoss.

## Calculate the difference b/w two 3-d tensors

In [36]:
print(np.subtract(event_i, event_j))

[[[ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  ...
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[ 0.          0.          0.         ...  0.          0.
   -0.79956084]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.69699216 -0.5719074  ...  0.          0.
    1.0353982 ]
  ...
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.   

## Square the difference b/w two tensors

In [37]:
print(np.power(np.subtract(event_i, event_j),2))

[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.63929754]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.48579806 0.32707807 ... 0.         0.         1.0720495 ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.      

## Get the scalar sum of the 3d tensor to compare with the torch.MSEloss

In [38]:
print(np.sum(np.power(np.subtract(event_i, event_j),2)))

115975.88


## So therefore, the MSE loss should be acting per element of the 3d tensor

## Use the torch MSEloss

In [39]:
from torch import nn
mse_loss = nn.MSELoss(reduction="none")

In [40]:
help(mse_loss)

Help on MSELoss in module torch.nn.modules.loss object:

class MSELoss(_Loss)
 |  Creates a criterion that measures the mean squared error (squared L2 norm) between
 |  each element in the input :math:`x` and target :math:`y`.
 |  
 |  The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
 |  
 |  .. math::
 |      \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
 |      l_n = \left( x_n - y_n \right)^2,
 |  
 |  where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
 |  (default ``'mean'``), then:
 |  
 |  .. math::
 |      \ell(x, y) =
 |      \begin{cases}
 |          \operatorname{mean}(L), &  \text{if reduction} = \text{'mean';}\\
 |          \operatorname{sum}(L),  &  \text{if reduction} = \text{'sum'.}
 |      \end{cases}
 |  
 |  :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
 |  of :math:`n` elements each.
 |  
 |  The sum operation still operates over all the elements, and divides by :math:`n`.
 |  
 |  

In [41]:
event_i = torch.Tensor(event_i)
event_j = torch.Tensor(event_j)

print(mse_loss(event_i, event_j))

tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.6393],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.4858, 0.3271,  ..., 0.0000, 0.0000, 1.0720],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.8375],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.

In [42]:
print(torch.sum(mse_loss(event_i, event_j)))

tensor(115975.8906)


## Ok. So we have to specify `reduction = none` in order to compute the 3d tensor of the loss. Which is correct : `reduction = none` or `reduction = mean` ?