In [1]:
import tensorflow.keras as K

2022-09-13 18:38:47.906623: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-13 18:38:47.906641: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [26]:
def build_model(nx, layers, activations, lambtha, keep_prob):
    """
    Build a neural network with the Keras library

    nx is the number of input features to the network

    layers is a list containing the number of nodes
    in each layer of the network

    activations is a list containing the activation

    functions used for each layer of the network

    lambtha is the L2 regularization parameter

    keep_prob is the probability that a node will
    be kept for dropout

    You are not allowed to use the Input class

    Returns: the keras model
    """
    model = K.Sequential()
    for i, layer in enumerate(layers):
        l2 = K.regularizers.L2(lambtha)
        if i == 0:
            model.add(K.layers.Dense(layer, input_shape=(nx,),
                                     activation=activations[i],
                                     kernel_regularizer=l2))
        else:
            model.add(K.layers.Dense(layer, 
                                     activation=activations[i],
                                     kernel_regularizer=l2))
        if i != len(layers)-1:
            model.add(K.layers.Dropout(1-keep_prob))
    return model

In [3]:
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               200960    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
[<tf.Tensor: shape=(), dtype=float32, numpy=0.38640815>, <tf.Tensor: shape=(), dtype=float32, numpy=0.25695384>, <tf

2022-09-13 18:38:58.414279: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-09-13 18:38:58.414316: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-13 18:38:58.414347: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (archpc): /proc/driver/nvidia/version does not exist
2022-09-13 18:38:58.414648: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# You are not allowed to use the Sequential class

In [1]:
def build_model(nx, layers, activations, lambtha, keep_prob):
    """
    Build a neural network with the Keras library

    nx is the number of input features to the network

    layers is a list containing the number of nodes in
    each layer of the network

    activations is a list containing the activation functions
    used for each layer of the network

    lambtha is the L2 regularization parameter

    keep_prob is the probability that a node will be kept for dropout
    Returns: the keras model
    """
    prev = K.Input(shape=(nx,))
    inputs = prev
    l2 = K.regularizers.L2(lambtha)
    for i, layer in enumerate(layers):
        prev = K.layers.Dense(layer, activation=activations[i], kernel_regularizer=l2)(prev)
        if i != len(layers) - 1:
            prev = K.layers.Dropout(keep_prob)(prev)

    model = K.Model(inputs=inputs, outputs=prev)
    return model

In [2]:
#!/usr/bin/env python3

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)

2022-09-13 19:42:56.579719: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-13 19:42:56.579739: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 256)               200960    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_______________________________________________________

2022-09-13 19:42:57.975802: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-09-13 19:42:57.975827: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-13 19:42:57.975852: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (archpc): /proc/driver/nvidia/version does not exist
2022-09-13 19:42:57.976057: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [27]:
def optimize_model(network, alpha, beta1, beta2):
    """
    sets up Adam optimization for a keras model with\
    categorical crossentropy loss and accuracy metrics.

    network is the model to optimize
    alpha is the learning rate
    beta1 is the first Adam optimization parameter
    beta2 is the second Adam optimization parameter
    Returns: None 
    """
    optimizer = K.optimizers.Adam(alpha, beta1, beta2)
    network.compile(optimizer=optimizer,
                    loss=K.losses.CategoricalCrossentropy(),
                    metrics=['accuracy'])

In [14]:

import tensorflow as tf


if __name__ == '__main__':
    model = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    optimize_model(model, 0.01, 0.99, 0.9)
    print(model.loss)
    opt = model.optimizer
    print(opt.__class__)
    print(tuple(map(lambda x: x.numpy(),(opt.lr, opt.beta_1, opt.beta_2))))

<class 'keras.losses.CategoricalCrossentropy'>
<class 'keras.optimizer_v2.adam.Adam'>
(0.01, 0.99, 0.9)


In [19]:
def one_hot(labels, classes=None):
    """One hot encoding keras."""
    if classes is None:
        classes = max(labels) + 1
    layer = K.layers.CategoryEncoding(
        num_tokens=classes, output_mode="one_hot")
    return layer(labels).numpy()

In [20]:
import numpy as np

if __name__ == '__main__':
    labels = np.load('../data/MNIST.npz')['Y_train'][:10]
    print(labels)
    print(one_hot(labels))   

[5 0 4 1 9 2 1 3 1 4]
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [21]:

def train_model(network, data, labels, batch_size, epochs,
                verbose=True, shuffle=False):
    """
    Train Keras Model.

    network is the model to train

    data is a numpy.ndarray of shape (m, nx) containing the input data

    labels is a one-hot numpy.ndarray of shape (m, classes)
     containing the labels of data

    batch_size is the size of the batch used for mini-batch gradient descent

    epochs is the number of passes through data for mini-batch gradient descent

    verbose is a boolean that determines if output should be
    printed during training

    shuffle is a boolean that determines whether to
    shuffle the batches every epoch.

    Normally, it is a good idea to shuffle, but for reproducibility,
    we have chosen to set the default to False.

    Returns: the History object generated after training the model
    """
    history = network.fit(data, labels, batch_size, epochs,
                          verbose=verbose, shuffle=shuffle)
    return history

In [28]:
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K



if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs)

2022-09-13 22:17:45.125496: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 156800000 exceeds 10% of free system memory.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [29]:
def train_model(network, data, labels, batch_size, epochs,
                validation_data=None, verbose=True, shuffle=False):
    """
    Train Keras Model.

    network is the model to train

    data is a numpy.ndarray of shape (m, nx) containing the input data

    labels is a one-hot numpy.ndarray of shape (m, classes)
     containing the labels of data

    batch_size is the size of the batch used for mini-batch gradient descent

    epochs is the number of passes through data for mini-batch gradient descent

    verbose is a boolean that determines if output should be
    printed during training

    shuffle is a boolean that determines whether to
    shuffle the batches every epoch.

    Normally, it is a good idea to shuffle, but for reproducibility,
    we have chosen to set the default to False.

    Returns: the History object generated after training the model
    """
    history = network.fit(data, labels, batch_size, epochs,
                          verbose=verbose, shuffle=shuffle,
                          validation_data=validation_data)
    return history



In [30]:

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs, validation_data=(X_valid, Y_valid_oh))

2022-09-13 22:22:51.813670: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 156800000 exceeds 10% of free system memory.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [31]:
def train_model(network, data, labels, batch_size, epochs, 
                validation_data=None, early_stopping=False, 
                patience=0, verbose=True, shuffle=False):
    """
    Train Keras Model.

    network is the model to train

    data is a numpy.ndarray of shape (m, nx) containing the input data

    labels is a one-hot numpy.ndarray of shape (m, classes)
     containing the labels of data

    batch_size is the size of the batch used for mini-batch gradient descent

    epochs is the number of passes through data for mini-batch gradient descent

    verbose is a boolean that determines if output should be
    printed during training

    validation_data is the data to validate the model with, if not None

    early_stopping is a boolean that indicates whether early stopping should be used

    patience is the patience used for early stopping

    shuffle is a boolean that determines whether to
    shuffle the batches every epoch.

    Normally, it is a good idea to shuffle, but for reproducibility,
    we have chosen to set the default to False.

    Returns: the History object generated after training the model
    """
    if validation_data:
        early_stopping_callback = K.callbacks.EarlyStopping(
            'val_loss', patience=patience)
    history = network.fit(data, labels, batch_size, epochs,
                          verbose=verbose, shuffle=shuffle,
                          validation_data=validation_data,
                          callbacks=early_stopping_callback)
    return history

In [32]:

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 30
    train_model(network, X_train, Y_train_oh, batch_size, epochs,
                validation_data=(X_valid, Y_valid_oh), early_stopping=True,
                patience=3)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


In [33]:

def train_model(network, data, labels, batch_size, epochs,
                validation_data=None, early_stopping=False,
                patience=0, learning_rate_decay=False,
                alpha=0.1, decay_rate=1, verbose=True,
                shuffle=False):
    """
    Train Keras Model.

    network is the model to train

    data is a numpy.ndarray of shape (m, nx) containing the input data

    labels is a one-hot numpy.ndarray of shape (m, classes)
     containing the labels of data

    batch_size is the size of the batch used for mini-batch gradient descent

    epochs is the number of passes through data for mini-batch gradient descent

    verbose is a boolean that determines if output should be
    printed during training

    validation_data is the data to validate the model with, if not None

    early_stopping is a boolean that indicates whether
    early stopping should be used

    patience is the patience used for early stopping

    learning_rate_decay is a boolean that indicates whether learning rate
    decay should be used

    alpha is the initial learning rate

    decay_rate is the decay rate

    shuffle is a boolean that determines whether to
    shuffle the batches every epoch.

    Normally, it is a good idea to shuffle, but for reproducibility,
    we have chosen to set the default to False.

    Returns: the History object generated after training the model
    """

    def schedule(epoch):
        previous_lr = 1

        def lr(epoch, start_lr, decay):
            nonlocal previous_lr
            previous_lr *= (start_lr / (1. + decay * epoch))
            return previous_lr
        return lr(epoch, alpha, decay_rate)

    callbacks = []
    if validation_data:
        if early_stopping:
            early_stopping_callback = K.callbacks.EarlyStopping(
                'val_loss', patience=patience)
            callbacks.append(early_stopping_callback)
        if learning_rate_decay:
            lr_callback = K.callbacks.LearningRateScheduler(
                schedule, verbose=True)
            callbacks.append(lr_callback)

    history = network.fit(data, labels, batch_size, epochs,
                          verbose=verbose, shuffle=shuffle,
                          validation_data=validation_data,
                          callbacks=callbacks)
    return history


In [34]:

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 1000
    train_model(network, X_train, Y_train_oh, batch_size, epochs,
                validation_data=(X_valid, Y_valid_oh), early_stopping=True,
                patience=3, learning_rate_decay=True, alpha=alpha)

Epoch 1/1000

Epoch 00001: LearningRateScheduler setting learning rate to 0.001.
Epoch 2/1000

Epoch 00002: LearningRateScheduler setting learning rate to 0.0005.
Epoch 3/1000

Epoch 00003: LearningRateScheduler setting learning rate to 0.0003333333333333333.
Epoch 4/1000

Epoch 00004: LearningRateScheduler setting learning rate to 0.00025.
Epoch 5/1000

Epoch 00005: LearningRateScheduler setting learning rate to 0.0002.
Epoch 6/1000

Epoch 00006: LearningRateScheduler setting learning rate to 0.00016666666666666666.
Epoch 7/1000

Epoch 00007: LearningRateScheduler setting learning rate to 0.00014285714285714287.
Epoch 8/1000

Epoch 00008: LearningRateScheduler setting learning rate to 0.000125.
Epoch 9/1000

Epoch 00009: LearningRateScheduler setting learning rate to 0.00011111111111111112.
Epoch 10/1000

Epoch 00010: LearningRateScheduler setting learning rate to 0.0001.
Epoch 11/1000

Epoch 00011: LearningRateScheduler setting learning rate to 9.090909090909092e-05.
Epoch 12/1000

E