In [342]:
import numpy as np
import math
import sys
import tensorflow as tf
import matplotlib.pyplot as plt

In [280]:
EPSILON = 1e-12

## Class ACTIVATIONS

In [261]:
class sigmoid():
    @staticmethod
    def activation(x):
        y = 1 / (1 + np.exp(-x))
        return y
    @staticmethod
    def prime(y):
        f = sigmoid.activation(y)
        return (f * (1 - f))

In [262]:
class softmax:
    @staticmethod
    def activation(x):
        # e_x = np.exp(x).reshape(-1,1)
        e_x = np.exp(x - np.max(x,axis=-1).reshape(-1,1))
        return e_x / e_x.sum(axis=-1).reshape(-1,1)
    @staticmethod
    def primeP(y): #marche pas
        n = y.shape[1]
        print(n,y.shape)
        i = np.identity(n)
        un = np.ones((n,y.shape[1]))
        SM = y.reshape((-1,1))
        print(SM.shape)
        jac = np.diagflat(y) - np.dot(SM, SM.T)
        j = np.dot((i* jac),un)
        # x = np.dot(jac,y.T)
        return j.T
    @staticmethod
    def prime(y):
        return(y * (1-y))

In [263]:
def Softmax(x):
    '''
    Performs the softmax activation on a given set of inputs
    Input: x (N,k) ndarray (N: no. of samples, k: no. of nodes)
    Returns: 
    Note: Works for 2D arrays only(rows for samples, columns for nodes/outputs)
    '''
    max_x = np.amax(x, 1).reshape(x.shape[0],1) # Get the row-wise maximum
    e_x = np.exp(x - max_x ) # For stability
    return e_x / e_x.sum(axis=1, keepdims=True) 

In [264]:
def Softmax_grad(x): # Best implementation (VERY FAST)
    '''Returns the jacobian of the Softmax function for the given set of inputs.
    Inputs:
    x: should be a 2d array where the rows correspond to the samples
        and the columns correspond to the nodes.
    Returns: jacobian
    '''
    s = Softmax(x)
    a = np.eye(s.shape[-1])
    temp1 = np.zeros((s.shape[0], s.shape[1], s.shape[1]),dtype=np.float32)
    temp2 = np.zeros((s.shape[0], s.shape[1], s.shape[1]),dtype=np.float32)
    temp1 = np.einsum('ij,jk->ijk',s,a)
    temp2 = np.einsum('ij,ik->ijk',s,s)
    return temp1-temp2

## Class Loss Functions

In [326]:
class BinaryCrossEntropy:
    def __init__(self, p, y,_m,**kwargs):
        self.p = p
        self.y = y
        self.m = _m
        self.sw = kwargs.get('sample_weight',1)

    def normalized(self,a,y):
        self.p = a
        self.y = y
        self.c = self.y.shape[-1] #nb de class
    def metrics(self):
        r = (1 / self.m) * np.sum(self.forward()) /self.c
        return r
    def forward(self):
    # (-y.log(p) - (1-y).log(1-p))
        f = ((-self.y * np.log(self.p + EPSILON)) - ((1 - self.y) * (np.log(1 - self.p + EPSILON)))) * self.sw
        return f
    def backward(self):
    # -y/p + (1-y)/(1-p)
        # n = self.p.shape[0]
        return (-self.y/(self.p + EPSILON) + (1 - self.y)/(1 - self.p + EPSILON)) * self.sw

In [327]:
class CategoricalCrossEntropy():
    def __init__(self, a, y_true , _m, **kwargs):
        self.p = a / (np.sum(a,axis=-1,keepdims=True)+EPSILON)
        self.y = y_true
        self.m = _m
        self.sw = kwargs.get('sample_weight',1)
    def normalized(self,a,y_true):
        self.p = a / (np.sum(a,axis=-1,keepdims=True)+EPSILON)
        self.y = y_true
        # self.c = self.y.shape[-1] #nb de class
    def metrics(self): # -y.log(p)
        return (1 / self.m) * np.sum(self.forward())
    def forward(self): # -y.log(p)
        return (-self.y * np.log(self.p+EPSILON)) * self.sw
    def backward(self):
    # -y/p
        return (-self.y/(self.p+EPSILON)) * self.sw

In [328]:
class MSE():
    def __init__(self, p, y, _m, **kwargs):
        self.p = p
        self.y = y
        self.m = _m
        self.sw = kwargs.get('sample_weight',1)
    def normalized(self,a,y):
        self.p = a
        self.y = y
        self.c = self.y.shape[-1] #nb de class
    def forward(self):
        return (np.square(self.p - self.y)) / self.c * self.sw
    def backward(self):
        return (2*(self.p - self.y)) * self.sw
    def metrics(self):
        return (1 / self.m) * (np.sum(self.forward()))
        # return 1/self.m *np.sum(np.square(self.p - self.y))

## JEUX DE TEST 
* y : valeur TRUE
* z : simulation valeur sortie couche z = wX + b
* a : résultat couche activation SOFTMAX

In [329]:
np.random.seed(120)
y = np.array([[0,0,0,0,1,0,0,0,0,0]])
z = np.random.randn(1,10)/np.sqrt(2/10)
a = softmax.activation(z)
# print(a)
print(z,a,a.sum(axis=-1))

[[ 0.32970771  4.52768052 -0.95453353  5.16596603  1.40572632 -0.1239037
  -2.95130235  0.90576774 -0.41480254 -0.22734566]] [[4.99355964e-03 3.32327797e-01 1.38251979e-03 6.29173675e-01
  1.46460052e-02 3.17255608e-03 1.87709251e-04 8.88361913e-03
  2.37177533e-03 2.86078379e-03]] [1.]


In [330]:
a2 = Softmax(z)
print(a2,a2.sum(axis=-1))

[[4.99355964e-03 3.32327797e-01 1.38251979e-03 6.29173675e-01
  1.46460052e-02 3.17255608e-03 1.87709251e-04 8.88361913e-03
  2.37177533e-03 2.86078379e-03]] [1.]


In [331]:
L = BinaryCrossEntropy(a,y,1)
L.normalized(a,y)
l = L.metrics()
print(l)
dL_da = L.backward()
da_dz = softmax.prime(a)
diff = dL_da*da_dz
print(diff,y.shape)
print(a-y)

0.5643484715378928
[[ 4.99355964e-03  3.32327797e-01  1.38251979e-03  6.29173675e-01
  -9.85353995e-01  3.17255608e-03  1.87709251e-04  8.88361913e-03
   2.37177533e-03  2.86078379e-03]] (1, 10)
[[ 4.99355964e-03  3.32327797e-01  1.38251979e-03  6.29173675e-01
  -9.85353995e-01  3.17255608e-03  1.87709251e-04  8.88361913e-03
   2.37177533e-03  2.86078379e-03]]


In [332]:
da_dz_2 = Softmax_grad(a).reshape(10,10)
# print(da_dz_2,dL_da)
print(np.dot(da_dz_2,dL_da.T).T)

[[ 0.5222245   0.78520852  0.52002033  1.25547688 -5.68949143  0.52111153
   0.51929353  0.52461186  0.52062303  0.52092127]]


In [341]:
da_dz_3 = softmax.primeP(a)
# print(da_dz_3)
print(np.dot(dL_da,da_dz_3))

10 (1, 10)
(10, 1)
[[-2.83507696e-01 -1.26607282e+01 -7.87769644e-02 -1.33128126e+01
  -8.23455597e-01 -1.80450468e-01 -1.07086042e-02 -5.02392681e-01
  -1.35011572e-01 -1.62768197e-01]]


In [334]:
# Vérification CCE
f_cce = CategoricalCrossEntropy(a,y,1)
f_cce.normalized(a,y)
e1 = f_cce.metrics()
t_tf = tf.nn.softmax(z)
t_cce = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
t1 = t_cce(y,t_tf).numpy()
print('ep cce:', e1)
print('tf cce:',t1)

ep cce: 4.223587661845474
tf cce: 4.2235876619127515


In [335]:
t_tf = tf.nn.softmax(z)
print(t_tf)

tf.Tensor(
[[4.99355964e-03 3.32327797e-01 1.38251979e-03 6.29173675e-01
  1.46460052e-02 3.17255608e-03 1.87709251e-04 8.88361913e-03
  2.37177533e-03 2.86078379e-03]], shape=(1, 10), dtype=float64)


In [336]:
t_bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
t = t_bce(y,t_tf).numpy()
print('ep bce:', l)
print('tf bce:', t)

ep bce: 0.5643484715378928
tf bce: 0.5643476765841015


## Vérification metrics Binary Cross Entropy
* EPSILON = 1e-12
* utilisation sample_weight

In [340]:
y_true = np.array([[0, 1], [0, 0]])
y_pred = np.array([[-18.6, 0.51], [2.94, -12.8]])
sw = np.array([[0.8],[0.2]])
ypS = sigmoid.activation(y_pred)
# yp = np.clip(y_pred,1e-8,1e8)
bce = BinaryCrossEntropy(ypS,y_true,2,sample_weight = sw)
bce.normalized(ypS,y_true)
bce_f = (bce.forward())
print(np.mean(bce_f))
l_bce = bce.metrics()
print('ep:',l_bce)
t_bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
print('tf:',t_bce(y_true,y_pred,sample_weight=sw).numpy())

0.24363858891514345
ep: 0.24363858891514345
tf: 0.24363858891670923


## Vérification metrics Categorical Cross Entropy

In [338]:
y_true = np.array([[0, 1], [0, 0]])
y_pred = np.array([[-1.6, 0.51], [-1.94, -1.8]])
sw = np.array([[0.8],[0.2]])
ypS = sigmoid.activation(y_pred)
t_ypS = tf.nn.sigmoid(y_pred)
# print(t_ypS,ypS)
t2_cce = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
t2 = t2_cce(y_true,t_ypS,sample_weight=sw).numpy()
print('tf:',t2)
e2_cce = CategoricalCrossEntropy(t_ypS,y_true,2,sample_weight=sw)
e2_cce.normalized(t_ypS,y_true)
e2 = e2_cce.metrics()
print('ep:',e2)

tf: 0.09524559936875954
ep: 0.09524559936875655


## Vérification MSE

In [339]:
t_mse = tf.keras.losses.MeanSquaredError()
tf_mse = t_mse(y_true,t_ypS).numpy()
print('tf:',tf_mse)
f_mse = MSE(t_ypS,y_true,2)
f_mse.normalized(t_ypS,y_true)
ep_mse = f_mse.metrics()
print('ep:',ep_mse)

tf: 0.051224278248531804
ep: 0.051224278248531804


In [355]:
# x = (np.arange(0,10).reshape((10,1)) - 5 ) /10
x = np.random.randn(10,2)
# print(x)
s = softmax()
s_x = s.activation(x)
print(s_x)

[[0.60854981 0.39145019]
 [0.50219033 0.49780967]
 [0.181474   0.818526  ]
 [0.52390805 0.47609195]
 [0.66024491 0.33975509]
 [0.77328332 0.22671668]
 [0.90635004 0.09364996]
 [0.83287439 0.16712561]
 [0.61834897 0.38165103]
 [0.40778718 0.59221282]]
