# Special Topics in Computer Science
## INT3121 20 UET ----- 4-6 Wednesday 210 GD3
## Week 3

## Diep Ng., Ph.D.
#### Mail-to: ngocdiep at vnu.edu.vn

# 1. Cross entropy loss

In [79]:
def cross_entropy(y_true, y_pred, eps=1e-12):
    '''
    Log loss is undefined for p=0 or p=1,
        so probabilities are
        clipped to (eps, 1 - eps).
    '''
    y_true = np.clip(y_true, eps, 1. - eps)
    y_pred = np.clip(y_pred, eps, 1. - eps)
    log_likelihood = -np.log(y_pred)
    loss = -(y_true * np.log(y_pred)).sum()
    return loss

In [80]:
cross_entropy([1, 0, 0, 0], [0.9, 0, 0, 0.1])

0.10536051571528555

In [84]:
cross_entropy([1, 0, 0, 0], [0.9, 0.05, 0.05, 0]) #doesn't matter how prob.s of incorrect classes distribute

0.1053605156913434

In [85]:
cross_entropy([1, 0, 0, 0], [0.8, 0.1, 0.1, 0])

0.22314355134622274

In [87]:
cross_entropy([1, 0, 0, 0], [0.1, 0.8, 0.1, 0]) #totally incorrect

2.3025850930218996

In [88]:
cross_entropy([1, 0, 0, 0], [0.3, 0.3, 0.3, 0.1]) #

1.2039728043294426

In [73]:
def cross_entropy2(x, y):
    """ Computes cross entropy between two distributions.
    Input: x: iterabale of N non-negative values
           y: iterabale of N non-negative values
    Returns: scalar
    """
    x = np.array(x, dtype=np.float)
    y = np.array(y, dtype=np.float)
    if np.any(x < 0) or np.any(y < 0):
        raise ValueError('Negative values exist.')

    # Force to proper probability mass function.
   
    x /= np.sum(x)
    y /= np.sum(y)

    # Ignore zero 'y' elements.
    mask = y > 0
    x = x[mask]
    y = y[mask]    
    ce = -np.sum(x * np.log(y)) 
    return ce

In [89]:
cross_entropy2([1, 0, 0, 0], [0.9, 0, 0, 0.1])

0.10536051565782628

In [90]:
from keras import metrics

In [93]:
metrics.binary_accuracy([1, 0], [1, 1])

<tf.Tensor 'Mean:0' shape=() dtype=float32>

In [98]:
K.metrisc.binary_accuracy(np.array([1, 0]), np.array([1, 1]))

AttributeError: module 'keras.backend' has no attribute 'metrisc'

# 2. Backpropagation

Example computation graph & computed gradients:

$f(x, y, z) = (x+y)*z$

<img src='backpropagation.png' width=400/>


In [101]:
# set some inputs
x = -2; y = 5; z = -4

# perform the forward pass
q = x + y # q becomes 3
f = q * z # f becomes -12

# perform the backward pass (backpropagation) in reverse order:
# first backprop through f = q * z
dfdz = q # df/dz = q, so gradient on z becomes 3
dfdq = z # df/dq = z, so gradient on q becomes -4
# now backprop through q = x + y
dqdx = 1.0
dqdy = 1.0
dfdx = dfdq * dqdx # will be z = -4 #And the multiplication here is the chain rule!
dfdy = dfdq * dqdy# will be z = -4 #
print("[dfdx, dfdy, dfdz] = [{}, {}, {}]".format(dfdx, dfdy, dfdz))

[dfdx, dfdy, dfdz] = [-4.0, -4.0, 3]


# Hyperparameter search

Potential use cases of hyperas

- Varying dropout probabilities, sampling from a uniform distribution
- Different layer output sizes
- Different optimization algorithms to use
- Varying choices of activation functions
- Conditionally adding layers depending on a choice
- Swapping whole sets of layers

In [108]:
%%writefile hyperas_example.py
'''
Reference: https://github.com/maxpumperla/hyperas
'''
import numpy as np

from hyperopt import Trials, STATUS_OK, tpe
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils

from hyperas import optim
from hyperas.distributions import choice, uniform


def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(60000, 784)
    x_test = x_test.reshape(10000, 784)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    nb_classes = 10
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)
    return x_train, y_train, x_test, y_test


def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    model = Sequential()
    model.add(Dense(512, input_shape=(784,)))
    model.add(Activation('relu'))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense({{choice([256, 512, 1024])}}))
    model.add(Activation({{choice(['relu', 'sigmoid'])}}))
    model.add(Dropout({{uniform(0, 1)}}))

    # If we choose 'four', add an additional fourth layer
    if {{choice(['three', 'four'])}} == 'four':
        model.add(Dense(100))

        # We can also choose between complete sets of layers

        model.add({{choice([Dropout(0.5), Activation('linear')])}})
        model.add(Activation('relu'))

    model.add(Dense(10))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                  optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})

    result = model.fit(x_train, y_train,
              batch_size={{choice([64, 128])}},
              epochs=2,
              verbose=2,
              validation_split=0.1)
    #get the highest validation accuracy of the training epochs
    validation_acc = np.amax(result.history['val_acc']) 
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}


if __name__ == '__main__':
    best_run, best_model = optim.minimize(model=create_model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials())
    X_train, Y_train, X_test, Y_test = data()
    print("Evalutation of best performing model:")
    print(best_model.evaluate(X_test, Y_test))
    print("Best performing model chosen hyper-parameters:")
    print(best_run)

Overwriting hyperas_example.py


In [109]:
!python hyperas_example.py

Using TensorFlow backend.
>>> Imports:
#coding=utf-8

try:
    import numpy as np
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from keras.datasets import mnist
except:
    pass

try:
    from keras.layers.core import Dense, Dropout, Activation
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.utils import np_utils
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'Dropout': hp.uniform('Dropout', 0, 1),
        'Dense': hp.choice('Dense', [256, 512, 1024]),
        'Activation': hp.choice('Activation', ['relu', 'sigmoid']),
        'Dropout_1': hp.uniform('Dropout_1', 0, 1),
        'Dropout_2': hp.choice('Dropout_2', ['three', 'four']),
        'add': hp.choice('add', [Dropout(0.5), Activation('linear')]),
        'optimize

[0.11373598002339713, 0.9666]
Best performing model chosen hyper-parameters:
{'Activation': 1, 'Dense': 2, 'Dropout': 0.03323327852409652, 'Dropout_1': 0.0886198698550964, 'Dropout_2': 1, 'add': 0, 'batch_size': 1, 'optimizer': 0}
