In [19]:
# Imports
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow.compat.v1.keras as K
tf.compat.v1.disable_eager_execution()
tf.disable_v2_behavior()

In [20]:
# 0. Sequential
"""
Write a function that builds a neural network with the Keras library
Use Sequential model since that's in the task name
"""
def build_model_0(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Input` class
    Returns the keras model
    """
    model = K.Sequential()
    # Create our input layer
    model.add(K.layers.InputLayer(input_shape=(nx,)))
    # Create all of our hidden layers
    for layer in range(len(layers)):
        model.add(K.layers.Dense(units=layers[layer],
                                 activation=activations[layer],
                                 kernel_regularizer=K.regularizers.l2(lambtha)
                                 ))
        # Handle dropout on hidden layers
        if layer < len(layers) - 1:
            model.add(K.layers.Dropout(1 - keep_prob))
    return model

In [21]:
# 0-main
network = build_model_0(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
network.summary()
print(network.losses)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_19 (Dense)            (None, 256)               200960    
                                                                 
 dropout_12 (Dropout)        (None, 256)               0         
                                                                 
 dense_20 (Dense)            (None, 256)               65792     
                                                                 
 dropout_13 (Dropout)        (None, 256)               0         
                                                                 
 dense_21 (Dense)            (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
[<tf.Tensor 'dense_19/kernel/Regularizer/mul:0' 

In [22]:
# 0. Sequential | Attempt 2
"""
Write a function that builds a neural network with the Keras library
Use Sequential model since that's in the task name
"""
def build_model_1(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Input` class
    Returns the keras model
    """
    model = K.Sequential()
    # Create our input layer
    model.add(K.layers.Dense(units=layers[0],
                             activation = activations[0],
                             kernel_regularizer=K.regularizers.l2(lambtha),
                             input_shape=(nx,)))
    # Create all of our hidden layers
    for layer in range(1, len(layers)):
        if layer <= len(layers) - 1:
            model.add(K.layers.Dropout(1 - keep_prob))
        model.add(K.layers.Dense(units=layers[layer],
                                 activation=activations[layer],
                                 kernel_regularizer=K.regularizers.l2(lambtha)
                                 ))
        # Handle dropout on hidden layers
    return model

In [23]:
# 0-main | Attempt 2
network = build_model_1(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
network.summary()
print(network.losses)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_22 (Dense)            (None, 256)               200960    
                                                                 
 dropout_14 (Dropout)        (None, 256)               0         
                                                                 
 dense_23 (Dense)            (None, 256)               65792     
                                                                 
 dropout_15 (Dropout)        (None, 256)               0         
                                                                 
 dense_24 (Dense)            (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
[<tf.Tensor 'dense_22/kernel/Regularizer/mul:0' 

In [24]:
# Task 1. Input
"""
Write a function that builds a neural network with the Keras library
"""
def build_model(nx, layers, activations, lambtha, keep_prob):
    """
    nx: number of input features to the network
    layers: list containing the number of nodes in each layer of the network
    activations: list containing the activation functions for each layer
    lambtha: L2 regularization parameter
    keep_prob: probability that a node will be kept for dropout

    Not allowed to use `Sequential` class
    Returns the keras model
    """
    inputs = K.Input(shape=(nx,))
    x = K.layers.Dense(units=layers[0],
                       activation=activations[0],
                       kernel_regularizer=K.regularizers.l2(lambtha)
                       )(inputs)
    for layer in range(1, len(layers)):
        dropout_layer = K.layers.Dropout(1 - keep_prob)(x)
        x = K.layers.Dense(units=layers[layer],
                           activation=activations[layer],
                           kernel_regularizer=K.regularizers.l2(lambtha)
                           )(dropout_layer)

    model = K.Model(inputs=inputs, outputs=x)
    return model



In [25]:
# 1-main
model = build_model(200, [10], ['tanh'], 0.01, 0.6)
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 200)]             0         
                                                                 
 dense_25 (Dense)            (None, 10)                2010      
                                                                 
Total params: 2,010
Trainable params: 2,010
Non-trainable params: 0
_________________________________________________________________


In [26]:
# 2. Optimize
"""
Write a function that sets up Adam optimization for a keras model with
categorical crossentropy loss and accuracy metrics
"""
def optimize_model(network, alpha, beta1, beta2):
    """
    network: the model to optimize
    alpha: learning rate
    beta1: the first Adam optimization parameter
    beta2: second Adam optimization parameter
    Returns none
    """

    Adam_opt = K.optimizers.Adam(lr=alpha,
                                 beta_1=beta1,
                                 beta_2=beta2)
    network.compile(optimizer=Adam_opt,
                    metrics=['accuracy'],
                    loss="categorical_crossentropy")
    return None

In [27]:
# 2-main
# model = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
# optimize_model(model, 0.01, 0.99, 0.9)
# print(model.loss)
# print(model.metrics)
# opt = model.optimizer
# print(opt.__class__)
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     print(sess.run((opt.lr, opt.beta_1, opt.beta_2))) 

In [28]:
# 3. One Hot
"""
Runction that converts a label vector into a one-hot matrix
"""
def one_hot(labels, classes=None):
    """
    labels: labels for dat
    """
    return K.utils.to_categorical(labels, classes)


In [29]:
# 3-main
labels = np.load('../data/MNIST.npz')['Y_train'][:10]
print(labels)
print(one_hot(labels))   

[5 0 4 1 9 2 1 3 1 4]
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [30]:
# Task 4. Train
"""
Write a function that trains a model using mini-batch gradient descent
"""
def train_model_4(network, data, labels, batch_size,
                epochs, verbose=True, shuffle=False):
    """
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       shuffle=shuffle,
                       )

In [31]:
# 4-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 5
train_model_4(network, X_train, Y_train_oh, batch_size, epochs)

Train on 50000 samples


  super().__init__(name, **kwargs)


Epoch 1/5
   64/50000 [..............................] - ETA: 1:07 - loss: 2.4673 - acc: 0.0625

2023-03-06 13:54:50.919426: W tensorflow/c/c_api.cc:291] Operation '{name:'training_6/Adam/dense_28/kernel/v/Assign' id:2556 op device:{requested: '', assigned: ''} def:{{{node training_6/Adam/dense_28/kernel/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_6/Adam/dense_28/kernel/v, training_6/Adam/dense_28/kernel/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa0ea8b8370>

In [32]:
# Task 5. Validate
"""
Based on 4. Train, update the function def train_model to also analyze
validation data
"""
def train_model_5(network, data, labels, batch_size, epochs,
                  validation_data=None, verbose=True, shuffle=False):
    """
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       shuffle=shuffle,
                       validation_data=validation_data
                       )

In [33]:
# 5-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 5
train_model_5(network, X_train, Y_train_oh, batch_size, epochs, validation_data=(X_valid, Y_valid_oh))

Train on 50000 samples, validate on 10000 samples
Epoch 1/5


2023-03-06 13:55:08.188059: W tensorflow/c/c_api.cc:291] Operation '{name:'training_8/Adam/dense_31/kernel/m/Assign' id:3049 op device:{requested: '', assigned: ''} def:{{{node training_8/Adam/dense_31/kernel/m/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_8/Adam/dense_31/kernel/m, training_8/Adam/dense_31/kernel/m/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.




  updates = self.state_updates
2023-03-06 13:55:11.599643: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_4/AddN_1' id:2853 op device:{requested: '', assigned: ''} def:{{{node loss_4/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_4/mul, loss_4/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa0ea93b580>

In [34]:
# Task 6. Early Stopping
"""
Based on 5. Validate, update the function def train_model to also train the
model using early stoping
"""
def train_model_6(network, data, labels, batch_size, epochs,
                  validation_data=None, early_stopping=False, patience=0,
                  verbose=True, shuffle=False):
    """
    early_stopping: boolean that indicates whether or not to stop early
        early stopping should only be performed if `validation_data` exists
        early stopping should be based on validation loss
    patience: patience used for early stopping
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    if validation_data is not None:
        if early_stopping:
            """Create earlystop callback"""
            earlystopping = K.callbacks.EarlyStopping(monitor="val_loss",
                                                      patience=patience)
            return network.fit(x=data,
                               y=labels,
                               batch_size=batch_size,
                               epochs=epochs,
                               verbose=verbose,
                               shuffle=shuffle,
                               validation_data=validation_data,
                               callbacks=[earlystopping]
                               )
    else:
        return network.fit(x=data,
                           y=labels,
                           batch_size=batch_size,
                           epochs=epochs,
                           verbose=verbose,
                           shuffle=shuffle,
                           validation_data=validation_data,
                           )


In [35]:
# 6-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 30
train_model_6(network, X_train, Y_train_oh, batch_size, epochs,
            validation_data=(X_valid, Y_valid_oh), early_stopping=True,
                patience=3)

Train on 50000 samples, validate on 10000 samples
Epoch 1/30
   64/50000 [..............................] - ETA: 1:11 - loss: 2.4500 - acc: 0.1406

2023-03-06 13:55:25.733795: W tensorflow/c/c_api.cc:291] Operation '{name:'training_10/Adam/learning_rate/Assign' id:3528 op device:{requested: '', assigned: ''} def:{{{node training_10/Adam/learning_rate/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_10/Adam/learning_rate, training_10/Adam/learning_rate/Initializer/initial_value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.




2023-03-06 13:55:29.449547: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_5/AddN_1' id:3363 op device:{requested: '', assigned: ''} def:{{{node loss_5/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_5/mul, loss_5/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30


<keras.callbacks.History at 0x7fa0ea57c130>

In [36]:
# Task 7. Learning Rate Decay
"""
Based on 6. Early Stopping, update the function def train_model to also train
model with learning rate decay
"""
def train_model_7(network, data, labels, batch_size, epochs,
                  validation_data=None, early_stopping=False, patience=0,
                  learning_rate_decay=False, alpha=0.1, decay_rate=1,
                  verbose=True, shuffle=False):
    """
    learning_rate_decay: boolean that indicates whether or not learning rate
    decay should be used
        learning rate decay should only be performed if validation_data exists
        the decay should be performed using inverse time decay
        the learning rate should decay in a stepwise fashion after each epoch
        each time the learning rate updates, Keras should print a message
    early_stopping: boolean that indicates whether or not to stop early
        early stopping should only be performed if `validation_data` exists
        early stopping should be based on validation loss
    patience: patience used for early stopping
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    callback_list = []

    earlystopping = early_stopping
    if earlystopping and validation_data is not None:
        earlystopping = K.callbacks.EarlyStopping(monitor="val_loss",
                                                  patience=patience)
        callback_list.append(earlystopping)

    learningratedecay = learning_rate_decay
    if learningratedecay and validation_data is not None:

        def scheduler(epoch):
            return (alpha / (1 + decay_rate * epoch))
        learningratedecay = K.callbacks.LearningRateScheduler(scheduler,
                                                              verbose=1)
        callback_list.append(learningratedecay)

    return network.fit(x=data,
                        y=labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=verbose,
                        shuffle=shuffle,
                        validation_data=validation_data,
                        callbacks=callback_list
                        )


In [37]:
# 7-main
# Force Seed - fix for Keras
SEED = 7

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
# import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

lib = np.load('../data/MNIST.npz')
X_train = lib['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = one_hot(lib['Y_train'], 10)
X_valid = lib['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = one_hot(lib['Y_valid'], 10)
model = build_model(784, [128, 64, 10], ['tanh', 'sigmoid', 'softmax'], 0.01, 0.6)
optimize_model(model, 0.01, 0.99, 0.9)
train_model_7(model, X_train, Y_train, 64, 5, validation_data=(X_valid, Y_valid), learning_rate_decay=True, alpha=0.01, decay_rate=2, verbose=False)



Epoch 1: LearningRateScheduler setting learning rate to 0.01.


2023-03-06 13:56:05.631122: W tensorflow/c/c_api.cc:291] Operation '{name:'training_12/Adam/dense_35/kernel/v/Assign' id:4079 op device:{requested: '', assigned: ''} def:{{{node training_12/Adam/dense_35/kernel/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_12/Adam/dense_35/kernel/v, training_12/Adam/dense_35/kernel/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-03-06 13:56:07.821622: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_6/AddN_1' id:3873 op device:{requested: '', assigned: ''} def:{{{node loss_6/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_6/mul, loss_6/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an err


Epoch 2: LearningRateScheduler setting learning rate to 0.0033333333333333335.

Epoch 3: LearningRateScheduler setting learning rate to 0.002.

Epoch 4: LearningRateScheduler setting learning rate to 0.0014285714285714286.

Epoch 5: LearningRateScheduler setting learning rate to 0.0011111111111111111.


<keras.callbacks.History at 0x7fa0ea46d310>

In [38]:
# Task 8. Save Only the Best
"""
Based on 7. Learning Rate Decay, update the function def train_model to also
save the best iteration of the model
"""
def train_model_8(network, data, labels, batch_size, epochs,
                  validation_data=None, early_stopping=False, patience=0,
                  learning_rate_decay=False, alpha=0.1, decay_rate=1,
                  save_best=False, filepath=None, verbose=True, shuffle=False):
    """
    save_best: boolean indicating whether to save the model after each epoch
    if it is the best
        A model is considered the best if its validation loss is the lowest
        that the model has obtained
    file_path: the file path to where the model should be saved
    learning_rate_decay: boolean that indicates whether or not learning rate
    decay should be used
        learning rate decay should only be performed if validation_data exists
        the decay should be performed using inverse time decay
        the learning rate should decay in a stepwise fashion after each epoch
        each time the learning rate updates, Keras should print a message
    early_stopping: boolean that indicates whether or not to stop early
        early stopping should only be performed if `validation_data` exists
        early stopping should be based on validation loss
    patience: patience used for early stopping
    validation_data: the data to validate the model with, if not `None`
    network: model to train
    data: numpy.ndarray of shape (m, nx) containing the input data
        m: number of data points
        nx: number of features
    labels: one-hot numpy.ndarray of shape (m, classes) with the data labels
        m: number of data points
        classes: number of classes
    batch_size: size of the batch used for mini-batch gradient descent
    epochs: number of passes through data for mini-batch gradient descent
    verbose: boolean that determines if the output should be printed during
        training
    shuffle: boolean that determines whether to shuffle the batches every
        epoch. Normally, it is a good idea to shuffle, but for reproducibility
        we have chosen to set the default to False
    Returns: the History object generated after training the model
    """
    callback_list = []

    earlystopping = early_stopping
    if earlystopping and validation_data is not None:
        earlystopping = K.callbacks.EarlyStopping(monitor="val_loss",
                                                  patience=patience)
        callback_list.append(earlystopping)

    learningratedecay = learning_rate_decay
    if learningratedecay and validation_data is not None:

        def scheduler(epoch):
            return (alpha / (1 + decay_rate * epoch))
        learningratedecay = K.callbacks.LearningRateScheduler(scheduler,
                                                              verbose=1)
        callback_list.append(learningratedecay)

    savebest = save_best
    if savebest:
        savebest = K.callbacks.ModelCheckpoint(filepath, save_best_only=True)
        callback_list.append(savebest)

    return network.fit(x=data,
                        y=labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=verbose,
                        shuffle=shuffle,
                        validation_data=validation_data,
                        callbacks=callback_list
                        )


In [39]:
# 8-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

lambtha = 0.0001
keep_prob = 0.95
network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
alpha = 0.001
beta1 = 0.9
beta2 = 0.999
optimize_model(network, alpha, beta1, beta2)
batch_size = 64
epochs = 1000
train_model_8(network, X_train, Y_train_oh, batch_size, epochs,
            validation_data=(X_valid, Y_valid_oh), early_stopping=True,
            patience=3, learning_rate_decay=True, alpha=alpha,
            save_best=True, filepath='network1.h5')

Train on 50000 samples, validate on 10000 samples


2023-03-06 13:56:19.939197: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_40/kernel/Assign' id:4269 op device:{requested: '', assigned: ''} def:{{{node dense_40/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_40/kernel, dense_40/kernel/Initializer/random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.



Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/1000

2023-03-06 13:56:23.990552: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_7/AddN_1' id:4382 op device:{requested: '', assigned: ''} def:{{{node loss_7/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_7/mul, loss_7/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.



Epoch 2: LearningRateScheduler setting learning rate to 0.0005.
Epoch 2/1000

Epoch 3: LearningRateScheduler setting learning rate to 0.0003333333333333333.
Epoch 3/1000

Epoch 4: LearningRateScheduler setting learning rate to 0.00025.
Epoch 4/1000

Epoch 5: LearningRateScheduler setting learning rate to 0.0002.
Epoch 5/1000

Epoch 6: LearningRateScheduler setting learning rate to 0.00016666666666666666.
Epoch 6/1000

Epoch 7: LearningRateScheduler setting learning rate to 0.00014285714285714287.
Epoch 7/1000

Epoch 8: LearningRateScheduler setting learning rate to 0.000125.
Epoch 8/1000

Epoch 9: LearningRateScheduler setting learning rate to 0.00011111111111111112.
Epoch 9/1000

Epoch 10: LearningRateScheduler setting learning rate to 0.0001.
Epoch 10/1000

Epoch 11: LearningRateScheduler setting learning rate to 9.090909090909092e-05.
Epoch 11/1000

Epoch 12: LearningRateScheduler setting learning rate to 8.333333333333333e-05.
Epoch 12/1000

Epoch 13: LearningRateScheduler setting

<keras.callbacks.History at 0x7fa0ea34b970>

In [45]:
# Task 9. Save and Load Model
"""
Write fnctions to save and load an entire model
"""
def save_model(network, filename):
    """
    network: model to save
    filename: path of the file that the model should be saved to
    Returns None
    """
    network.save(filename)
    return None

def load_model(filename):
    """
    filename: path of the fale that the model should be loaded from
    Returns the loaded model
    """
    return K.models.load_model(filename)


In [50]:
# 9-main
# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
# import numpy as np
np.random.seed(SEED)
# import tensorflow as tf
tf.set_random_seed(SEED)
# import tensorflow.keras as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)

datasets = np.load('../data/MNIST.npz')
X_train = datasets['X_train']
X_train = X_train.reshape(X_train.shape[0], -1)
Y_train = datasets['Y_train']
Y_train_oh = one_hot(Y_train)
X_valid = datasets['X_valid']
X_valid = X_valid.reshape(X_valid.shape[0], -1)
Y_valid = datasets['Y_valid']
Y_valid_oh = one_hot(Y_valid)

network = load_model('network1.h5')
batch_size = 32
epochs = 1000
train_model_8(network, X_train, Y_train_oh, batch_size, epochs,
            validation_data=(X_valid, Y_valid_oh), early_stopping=True,
            patience=2, learning_rate_decay=True, alpha=0.001)
save_model(network, 'network2.h5')
network.summary()
print(network.get_weights())
del network

network2 = load_model('network2.h5')
network2.summary()
print(network2.get_weights())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
2023-03-06 14:17:57.676607: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_40_1/bias/Assign' id:4787 op device:{requested: '', assigned: ''} def:{{{node dense_40_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_40_1/bias, dense_40_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-03-06 14:17:58.127816: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_39_1/bias/m/Assign' id:4968 op device:{requested: '', assigned: ''} def:{{{node dense_39_1/bias/m/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_39_1/bias/m, dense_39_1/bias/m/Initializer/zero

Train on 50000 samples, validate on 10000 samples


2023-03-06 14:17:58.669217: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_8/AddN_1' id:4920 op device:{requested: '', assigned: ''} def:{{{node loss_8/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_8/mul, loss_8/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.



Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/1000

  updates = self.state_updates
2023-03-06 14:18:06.068684: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_8/AddN_1' id:4920 op device:{requested: '', assigned: ''} def:{{{node loss_8/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_8/mul, loss_8/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.



Epoch 2: LearningRateScheduler setting learning rate to 0.0005.
Epoch 2/1000

Epoch 3: LearningRateScheduler setting learning rate to 0.0003333333333333333.
Epoch 3/1000

Epoch 4: LearningRateScheduler setting learning rate to 0.00025.
Epoch 4/1000

Epoch 5: LearningRateScheduler setting learning rate to 0.0002.
Epoch 5/1000

Epoch 6: LearningRateScheduler setting learning rate to 0.00016666666666666666.
Epoch 6/1000

Epoch 7: LearningRateScheduler setting learning rate to 0.00014285714285714287.
Epoch 7/1000

Epoch 8: LearningRateScheduler setting learning rate to 0.000125.
Epoch 8/1000

Epoch 9: LearningRateScheduler setting learning rate to 0.00011111111111111112.
Epoch 9/1000

Epoch 10: LearningRateScheduler setting learning rate to 0.0001.
Epoch 10/1000

Epoch 11: LearningRateScheduler setting learning rate to 9.090909090909092e-05.
Epoch 11/1000

Epoch 12: LearningRateScheduler setting learning rate to 8.333333333333333e-05.
Epoch 12/1000

Epoch 13: LearningRateScheduler setting

2023-03-06 14:20:20.196038: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_38_2/bias/Assign' id:5284 op device:{requested: '', assigned: ''} def:{{{node dense_38_2/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_38_2/bias, dense_38_2/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-03-06 14:20:21.356445: W tensorflow/c/c_api.cc:291] Operation '{name:'count_9/Assign' id:5399 op device:{requested: '', assigned: ''} def:{{{node count_9/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_9, count_9/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 784)]             0         
                                                                 
 dense_38 (Dense)            (None, 256)               200960    
                                                                 
 dropout_24 (Dropout)        (None, 256)               0         
                                                                 
 dense_39 (Dense)            (None, 256)               65792     
                                                                 
 dropout_25 (Dropout)        (None, 256)               0         
                                                                 
 dense_40 (Dense)            (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-traina