In [2]:
# Imports
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow.compat.v1.keras as K
tf.compat.v1.disable_eager_execution()
tf.disable_v2_behavior()

In [3]:
# 0. Sequential
"""
Write a function that builds a neural network with the Keras library
Use Sequential model since that's in the task name
"""
def build_model_0(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Input` class
    Returns the keras model
    """
    model = K.Sequential()
    # Create our input layer
    model.add(K.layers.InputLayer(input_shape=(nx,)))
    # Create all of our hidden layers
    for layer in range(len(layers)):
        model.add(K.layers.Dense(units=layers[layer],
                                 activation=activations[layer],
                                 kernel_regularizer=K.regularizers.l2(lambtha)
                                 ))
        # Handle dropout on hidden layers
        if layer < len(layers) - 1:
            model.add(K.layers.Dropout(1 - keep_prob))
    return model

In [4]:
# 0-main
network = build_model_0(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
network.summary()
print(network.losses)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               200960    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
[<tf.Tensor 'dense/kernel/Regularizer/mul:0' shape

In [5]:
# 0. Sequential | Attempt 2
"""
Write a function that builds a neural network with the Keras library
Use Sequential model since that's in the task name
"""
def build_model_1(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Input` class
    Returns the keras model
    """
    model = K.Sequential()
    # Create our input layer
    model.add(K.layers.Dense(units=layers[0],
                             activation = activations[0],
                             kernel_regularizer=K.regularizers.l2(lambtha),
                             input_shape=(nx,)))
    # Create all of our hidden layers
    for layer in range(1, len(layers)):
        if layer <= len(layers) - 1:
            model.add(K.layers.Dropout(1 - keep_prob))
        model.add(K.layers.Dense(units=layers[layer],
                                 activation=activations[layer],
                                 kernel_regularizer=K.regularizers.l2(lambtha)
                                 ))
        # Handle dropout on hidden layers
    return model

In [6]:
# 0-main | Attempt 2
network = build_model_1(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
network.summary()
print(network.losses)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               200960    
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_4 (Dense)             (None, 256)               65792     
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_5 (Dense)             (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
[<tf.Tensor 'dense_3/kernel/Regularizer/mul:0' s

In [7]:
# Task 1. Input
"""
Write a function that builds a neural network with the Keras library
"""
def build_model(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Sequential` class
    Returns the keras model
    """
    inputs = K.Input(shape=(nx,))
    x = K.layers.Dense(units=layers[0],
                       activation=activations[0],
                       kernel_regularizer=K.regularizers.l2(lambtha)
                       )(inputs)
    for layer in range(1, len(layers)):
        dropout_layer = K.layers.Dropout(1 - keep_prob)(x)
        x = K.layers.Dense(units=layers[layer],
                           activation=activations[layer],
                           kernel_regularizer=K.regularizers.l2(lambtha)
                           )(dropout_layer)

    model = K.Model(inputs=inputs, outputs=x)
    return model



In [8]:
# 1-main
model = build_model(200, [10], ['tanh'], 0.01, 0.6)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 200)]             0         
                                                                 
 dense_6 (Dense)             (None, 10)                2010      
                                                                 
Total params: 2,010
Trainable params: 2,010
Non-trainable params: 0
_________________________________________________________________


In [9]:
# 2. Optimize
"""
Write a function that sets up Adam optimization for a keras model with
categorical crossentropy loss and accuracy metrics
"""
def optimize_model(network, alpha, beta1, beta2):
    """
    network: the model to optimize
    alpha: learning rate
    beta1: the first Adam optimization parameter
    beta2: second Adam optimization parameter
    Returns none
    """

    Adam_opt = K.optimizers.Adam(lr=alpha,
                                 beta_1=beta1,
                                 beta_2=beta2)
    network.compile(optimizer=Adam_opt,
                    metrics=['accuracy'],
                    loss="categorical_crossentropy")
    return None

In [9]:
# 2-main
# model = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
# optimize_model(model, 0.01, 0.99, 0.9)
# print(model.loss)
# print(model.metrics)
# opt = model.optimizer
# print(opt.__class__)
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     print(sess.run((opt.lr, opt.beta_1, opt.beta_2))) 

In [10]:
# 3. One Hot
"""
Runction that converts a label vector into a one-hot matrix
"""
def one_hot(labels, classes=None):
    """
    labels: labels for dat
    """
    return K.utils.to_categorical(labels, classes)


In [11]:
# 3-main
labels = np.load('../data/MNIST.npz')['Y_train'][:10]
print(labels)
print(one_hot(labels))   

[5 0 4 1 9 2 1 3 1 4]
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [12]:
# Task 4. Train
"""
Write a function that trains a model using mini-batch gradient descent
"""
def train_model_4(network, data, labels, batch_size,
                epochs, verbose=True, shuffle=False):
    """
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       shuffle=shuffle,
                       )

In [None]:
# 4-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 5
train_model_4(network, X_train, Y_train_oh, batch_size, epochs)

In [14]:
# Task 5. Validate
"""
Based on 4. Train, update the function def train_model to also analyze
validation data
"""
def train_model_5(network, data, labels, batch_size, epochs,
                  validation_data=None, verbose=True, shuffle=False):
    """
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       shuffle=shuffle,
                       validation_data=validation_data
                       )

In [None]:
# 5-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 5
train_model_5(network, X_train, Y_train_oh, batch_size, epochs, validation_data=(X_valid, Y_valid_oh))

In [16]:
# Task 6. Early Stopping
"""
Based on 5. Validate, update the function def train_model to also train the
model using early stoping
"""
def train_model_6(network, data, labels, batch_size, epochs,
                  validation_data=None, early_stopping=False, patience=0,
                  verbose=True, shuffle=False):
    """
    early_stopping: boolean that indicates whether or not to stop early
        early stopping should only be performed if `validation_data` exists
        early stopping should be based on validation loss
    patience: patience used for early stopping
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    if validation_data is not None:
        if early_stopping:
            """Create earlystop callback"""
            earlystopping = K.callbacks.EarlyStopping(monitor="val_loss",
                                                      patience=patience)
            return network.fit(x=data,
                               y=labels,
                               batch_size=batch_size,
                               epochs=epochs,
                               verbose=verbose,
                               shuffle=shuffle,
                               validation_data=validation_data,
                               callbacks=[earlystopping]
                               )
    else:
        return network.fit(x=data,
                           y=labels,
                           batch_size=batch_size,
                           epochs=epochs,
                           verbose=verbose,
                           shuffle=shuffle,
                           validation_data=validation_data,
                           )


In [None]:
# 6-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 30
train_model_6(network, X_train, Y_train_oh, batch_size, epochs,
            validation_data=(X_valid, Y_valid_oh), early_stopping=True,
                patience=3)

In [12]:
# Task 7. Learning Rate Decay
"""
Based on 6. Early Stopping, update the function def train_model to also train
model with learning rate decay
"""
def train_model_7(network, data, labels, batch_size, epochs,
                  validation_data=None, early_stopping=False, patience=0,
                  learning_rate_decay=False, alpha=0.1, decay_rate=1,
                  verbose=True, shuffle=False):
    """
    learning_rate_decay: boolean that indicates whether or not learning rate
    decay should be used
        learning rate decay should only be performed if validation_data exists
        the decay should be performed using inverse time decay
        the learning rate should decay in a stepwise fashion after each epoch
        each time the learning rate updates, Keras should print a message
    early_stopping: boolean that indicates whether or not to stop early
        early stopping should only be performed if `validation_data` exists
        early stopping should be based on validation loss
    patience: patience used for early stopping
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    callback_list = []

    earlystopping = early_stopping
    if earlystopping and validation_data is not None:
        earlystopping = K.callbacks.EarlyStopping(monitor="val_loss",
                                                  patience=patience)
        callback_list.append(earlystopping)

    learningratedecay = learning_rate_decay
    if learningratedecay and validation_data is not None:

        def scheduler(epoch):
            return (alpha / (1 + decay_rate * epoch))
        learningratedecay = K.callbacks.LearningRateScheduler(scheduler,
                                                              verbose=1)
        callback_list.append(learningratedecay)

    return network.fit(x=data,
                        y=labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=verbose,
                        shuffle=shuffle,
                        validation_data=validation_data,
                        callbacks=callback_list
                        )


In [14]:
# 7-main
# Force Seed - fix for Keras
SEED = 7

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
# import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

lib = np.load('../data/MNIST.npz')
X_train = lib['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = one_hot(lib['Y_train'], 10)
X_valid = lib['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = one_hot(lib['Y_valid'], 10)
model = build_model(784, [128, 64, 10], ['tanh', 'sigmoid', 'softmax'], 0.01, 0.6)
optimize_model(model, 0.01, 0.99, 0.9)
train_model_7(model, X_train, Y_train, 64, 5, validation_data=(X_valid, Y_valid), learning_rate_decay=True, alpha=0.01, decay_rate=2, verbose=False)


2023-03-06 13:43:00.565626: W tensorflow/c/c_api.cc:291] Operation '{name:'training_2/Adam/dense_12/kernel/v/Assign' id:1190 op device:{requested: '', assigned: ''} def:{{{node training_2/Adam/dense_12/kernel/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_2/Adam/dense_12/kernel/v, training_2/Adam/dense_12/kernel/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-03-06 13:43:02.177001: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_1/AddN_1' id:962 op device:{requested: '', assigned: ''} def:{{{node loss_1/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_1/mul, loss_1/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in

<keras.callbacks.History at 0x7fa0eb78a6d0>

In [20]:
# Task 8. Save Only the Best