In [1]:
import numpy as np

def entropy(X):
    '''
    H(X)
    
    Args:
        X:    1-D `np.ndarray`.

    Returns:
        `float`
    '''
    unique, count = np.unique(
        X, 
        return_counts=True, 
        axis=0
    )
    p = count / X.shape[0]
    H = np.sum((-1)*p*np.log2(p))
    return H


def joint_entropy(X, Y):
    '''
    H(Y;X)

    Args:
        X:    1-D `np.ndarray`.
        Y:    1-D `np.ndarray`.

    Returns:
        `float`
    '''
    return entropy(np.c_[X, Y])


def conditional_entropy(X, Y):
    """
    H(Y|X) = H(Y;X) - H(X)

    Args:
        X:    1-D `np.ndarray`.
        Y:    1-D `np.ndarray`.

    Returns:
        `float`
    """
    return joint_entropy(Y, X) - entropy(X)

In [10]:
class SoftmaxWithCrossEntropy(object):
    '''
    Softmax and Cross Entropy.
    '''
    
    def __init__(self, ax=-1):
        ''' Init. '''
        self.__loss_arr = None
        self.__pred_arr = None
        self.__real_arr = None
        self.__ax = ax
    
    def forward(self, pred_arr, real_arr):
        '''
        Forward.
        
        Args:
            pred_arr:    `np.ndarray` of predicted data.
            real_arr:    `np.ndarray` of real data.
        
        Returns:
            Result.
        '''
        # Softmax.
        c_arr = np.expand_dims(np.max(pred_arr, axis=self.__ax), axis=-1)
        exp_arr = np.exp(pred_arr - c_arr)
        sum_exp_arr = np.expand_dims(np.sum(exp_arr, axis=self.__ax), axis=-1)
        prob_arr = exp_arr / sum_exp_arr

        self.__pred_arr = prob_arr
        self.__real_arr = real_arr

        # Cross Entropy.
        if real_arr.ndim == 1:
            real_arr = real_arr.reshape(1, real_arr.size)
            prob_arr = prob_arr.reshape(1, prob_arr.size)

        batch_size = prob_arr.shape[0]
        cross_entropy_arr = -np.sum(real_arr * np.log(prob_arr + 1e-08)) / batch_size
        
        self.__loss_arr = cross_entropy_arr
        return cross_entropy_arr

    def backward(self, delta_arr=1):
        '''
        Backward.
        
        Args:
            delta_arr:    `np.ndarray` of grads.
        
        Returns:
            grads.
        '''
        batch_size = self.__pred_arr.shape[0]
        dx_arr = (self.__pred_arr - self.__real_arr) / batch_size
        return dx_arr

In [11]:
softmax_with_loss = SoftmaxWithCrossEntropy()
label_arr = np.zeros((20, 10))
label_arr[:, 0] = 1

softmax_with_loss.forward(np.random.normal(size=(20, 10)), real_arr=label_arr)

2.6286840190004286

In [12]:
softmax_with_loss.backward()

array([[-0.04489067,  0.02869312,  0.00108114,  0.00550907,  0.00088671,
         0.00047268,  0.00154673,  0.00364564,  0.00158038,  0.00147519],
       [-0.04526119,  0.00367662,  0.00652601,  0.00406665,  0.00284018,
         0.00315073,  0.00777012,  0.00030669,  0.01561174,  0.00131246],
       [-0.04520361,  0.00216553,  0.00154386,  0.00237114,  0.00133351,
         0.00526934,  0.00031137,  0.02223755,  0.00874187,  0.00122945],
       [-0.04780361,  0.01021638,  0.01340742,  0.00038215,  0.00038802,
         0.00263455,  0.00262649,  0.01511869,  0.00234815,  0.00068177],
       [-0.04696956,  0.00254911,  0.00096781,  0.0050582 ,  0.00234173,
         0.00047192,  0.00209057,  0.00699826,  0.02511975,  0.00137221],
       [-0.04690909,  0.00406779,  0.00193208,  0.0065546 ,  0.00162557,
         0.00538604,  0.00205688,  0.00832994,  0.00281655,  0.01413963],
       [-0.03656915,  0.00454929,  0.00014618,  0.00352623,  0.00933859,
         0.00758685,  0.00537968,  0.00365581