In [15]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

tf.keras.backend.clear_session()  # Reset

In [16]:
from keras import backend as K

In [17]:
# Copiado paper como se calcula el cost con TF y Keras  ( La he manteniddo pero es la misma que abajo porque esta tiene la derivada)
def compute_stable_bce_cost_derivative(Y, Z):
    """
    This function computes the "Stable" Binary Cross-Entropy(stable_bce) Cost and returns the Cost and its
    derivative w.r.t Z_last(the last linear node) .
    The Stable Binary Cross-Entropy Cost is defined as:
    => (1/m) * np.sum(max(Z,0) - ZY + log(1+exp(-|Z|)))
    Args:
        Y: labels of data
        Z: Values from the last linear node
    Returns:
        cost: The "Stable" Binary Cross-Entropy Cost result
        dZ_last: gradient of Cost w.r.t Z_last
    """
    m = Y.shape[0]

    cost = (1/m) * np.sum(np.maximum(Z, 0) - Z*Y + np.log(1+ np.exp(- np.abs(Z))))
    dZ_last = (1/m) * ((1/(1+np.exp(- Z))) - Y)  # from Z computes the Sigmoid so P_hat - Y, where P_hat = sigma(Z)

    return cost, dZ_last

In [4]:
#  Formato de Tensorflow 

In [18]:
def compute_stable_bce_cost(Y, Z):
    """
    This function computes the "Stable" Binary Cross-Entropy(stable_bce) Cost and returns the Cost.
    The Stable Binary Cross-Entropy Cost is defined as:
    => (1/m) * np.sum(max(Z,0) - ZY + log(1+exp(-|Z|)))
    Args:
        Y: labels of data
        Z: Values from the last linear node
    Returns:
        cost: The "Stable" Binary Cross-Entropy Cost result
        dZ_last: gradient of Cost w.r.t Z_last
    """
    m = Y.shape[0]
    cost = (1/m) * np.sum(np.maximum(Z, 0) - Z*Y + np.log(1+ np.exp(- np.abs(Z))))
    return cost

In [19]:
import numpy as np
Y=  np.array( [0., 0., 1., 1.] , dtype=np.float32)          # Ground truth
Z = np.array ( [1., 1., 1., 0.], dtype=np.float32 )         # P probabilities
print ( compute_stable_bce_cost(Y, Z) )                     # Estable Binary Cross Entropy from TF 

0.9082330465316772


In [20]:
#  Tensorflow  format with function -> tf.keras.losses.BinaryCrossentropy
#  When logits = True  (Use Tensorflow way Stable Binary Cross Entropy ) 
#      Loss ( Y,Z ) = max ( z,0 ) - z*y + log (1 + e ^ |z|)
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.])
# Y= [0., 0., 1., 1.] y Z= [1., 1., 1., 0.]
print('Loss: ', loss.numpy())                                # Loss: 0.9082331 

Loss:  0.9082331


In [6]:
# Loss calculado en formato Tensorflow Manualmente
import numpy as np
loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.])
# Y= [0., 0., 1., 1.] y Z= [1., 1., 1., 0.]
# Calculo manual del loss 
# Y ground truth Z logits - salida de la última neurona pero sin sigmoid
# Loss ( Y,Z ) = max ( z,0 ) - z*y + log (1 + e ^ |z|)
loss_0_1 = ( 1+np.log(1+np.exp(-1)))    #  y= 0 z=1
loss_1_1 = ( 1-1+np.log(1+np.exp(-1)))  #  y= 1 z=1 
loss_1_0 = ( np.log(1+1))               #  y= 1 z=0 
Total_loss =  loss_0_1 + loss_0_1 + loss_1_1 + loss_1_0 
print ( Total_loss / 4.0)

0.9082330607786535


In [9]:
# Formato de Keras 

In [26]:
def compute_bce_keras(Y, P):
    """"
    Args:
        Y: labels of data
        P: Probabilities that have been clipped to  min = epsilon ( 10 ^- 7 ) &  max = 1 - epsilon 
    Returns:
        cost: Binary Cross-Entropy Returned by keras        
    """
    m = Y.shape[0]
    epsilon =  1e-7 
    cost = (1/m) * np.sum( Y * np.log(P+epsilon ) +  ( 1- Y )* np.log ( 1- P +epsilon ) ) 
    return -cost

In [18]:
# tf.keras.losses.BinaryCrossentropy   logits = False 
# When logits = False ( p's are probabilities )
#      - Clipping P's min 10^-7  y max 1-10^-7
#      - Use formula Cross entropy 
#          L(Y,P) = -Y*ln(P)-(1-Y)*ln(1-P)
Y=  np.array( [0., 0., 1., 1.] , dtype=np.float32)                              # Ground truth
P = np.array ( [1., 1., 1., 0.], dtype=np.float32 )                             # P probabilities
bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
loss = bce( Y , P)
print('Loss: ', loss.numpy())  # Loss: 11.522857


Loss:  11.522857


In [29]:
# Compute Binary Cross entropy 

# Numpy uses 64 bit as default 
# Tensorflow  32 bits as default 
Y=  np.array( [0., 0., 1., 1.], dtype=np.float32  )                              # Ground truth
P = np.array ( [1., 1., 1., 0.], dtype=np.float32  )                             # P probabilities
epsilon = 1e-7 
P_est = np.clip ( P,epsilon, 1.-epsilon)        # P_est - Estable probabilities
print ( compute_bce_keras(Y, P_est)) 

11.522856712341309


In [22]:
# Esta es la forma que dice como lo hace el paper Keras pero no coincide con los resultados  !!!!!!
#  Cuando se utiliza logits = False interpreta el 2ndo vector como probabilidades P
#  y las transforma en Z's ( lo que se llama logits ). despues con logits utilizara la formula de Tensorflow
#  1. Transformacion de probabilidades a Z's :
#      - las P's como min son 10^-7  y max 0.999999
#      - Estas P's las transforman en Z con la formula :
#           Z = ln( P / ( 1- P))
#  2. Utiliza la formula de Tensorflow con Logits 
#        Loss ( Y,Z ) = max ( z,0 ) - z*y + log (1 + e ^ |z|)

# Keras pasa los valores de P a Z's 
# Y= [0., 0., 1., 1.] y P= [1., 1., 1., 0.]

#  1. Transformacion de probabilidades a Z's :
#      - las P's como min son 10^-7  y max 0.999999
#loss = bce([0., 0., 1., 1.], [0.999999, 0.999999, 0.999999, 10^-7])
import numpy as np
# Numpy uses 64 bit as default 
# Tensorflow  32 bits as default 
Y=  np.array( [0., 0., 1., 1.] )                              # Ground truth
P = np.array ( [1., 1., 1., 0.] )                             # P probabilities

epsilon = np.float_power(10, -7)
P_est = np.clip ( P,epsilon, 1.-epsilon)        # P_est - Estable probabilitis

print ( 'P est:', P_est)
#      - Estas P's las transforman en Z con la formula :
#           Z = ln( P / ( 1- P))
Z= np.log( P_est / ( 1- P_est))                              # Transfor Probabilities to Logits
print ('Z:', Z)                                              # Print Logits

#Y_t = tf.convert_to_tensor(Y)
print ( type ( Y))
print (  Y.shape[0] )
# Cross entropy de TF : 
loss_Y, der_loss = compute_stable_bce_cost(Y, Z) 
print ( loss_Y )                     # Estable Binary Cross Entropy from TF 

####
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
loss = bce(Y,Z)
print (loss)


P est: [9.999999e-01 9.999999e-01 9.999999e-01 1.000000e-07]
Z: [ 16.11809555  16.11809555  16.11809555 -16.11809555]
<class 'numpy.ndarray'>
4
12.08857176348192
tf.Tensor(12.088571548461914, shape=(), dtype=float64)
