In [1]:
import tensorflow as tf
import numpy as np
import math
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
# from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from tensorflow.keras.layers import AveragePooling2D, Conv2D
from tensorflow.keras import models, layers, datasets
from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.initializers import RandomNormal
# import math
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
tf.__version__

'2.0.0'

In [3]:
batch_size = 32
num_classes = 10
num_epochs = 50

In [4]:
# Data preprocessing and cleadning:
# input image dimensions
img_rows, img_cols = 28, 28

# Load MNIST dataset-
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [5]:
if tf.keras.backend.image_data_format() == 'channels_first':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

print("\n'input_shape' which will be used = {0}\n".format(input_shape))


'input_shape' which will be used = (28, 28, 1)



In [6]:
# Convert datasets to floating point types-
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize the training and testing datasets-
X_train /= 255.0
X_test /= 255.0

In [7]:
# convert class vectors/target to binary class matrices or one-hot encoded values-
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [8]:
# Reshape training and testing sets-
X_train = X_train.reshape(X_train.shape[0], 784)
X_test = X_test.reshape(X_test.shape[0], 784)

In [9]:
print("\nDimensions of training and testing sets are:")
print("X_train.shape = {0}, y_train = {1}".format(X_train.shape, y_train.shape))
print("X_test.shape = {0}, y_test = {1}".format(X_test.shape, y_test.shape))


Dimensions of training and testing sets are:
X_train.shape = (60000, 784), y_train = (60000, 10)
X_test.shape = (10000, 784), y_test = (10000, 10)


In [10]:
l = tf.keras.layers

In [11]:
def nn_model():
    """
    Function to create LeNet 300-100-10
    model for MNIST classification
    """

    model = Sequential()

    model.add(l.InputLayer(input_shape=(784, )))

    model.add(Flatten())

    model.add(Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))

    # model.add(l.Dropout(0.2))

    model.add(Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()))

    # model.add(l.Dropout(0.1))

    model.add(Dense(units = num_classes, activation='softmax'))
    
    return model


In [12]:
# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:
pruning_params_unpruned = {
    'pruning_schedule': sparsity.ConstantSparsity(
        target_sparsity=0.0, begin_step=0,
        end_step = 0, frequency=100
    )
}

In [13]:
def pruned_nn(pruning_params):
    """
    Function to define the architecture of a neural network model
    following 300 100 architecture for MNIST dataset and using
    provided parameter which are used to prune the model.
    
    Input: 'pruning_params' Python 3 dictionary containing parameters which are used for pruning
    Output: Returns designed and compiled neural network model
    """
    
    pruned_model = Sequential()
    pruned_model.add(l.InputLayer(input_shape=(784, )))
    pruned_model.add(Flatten())
    pruned_model.add(sparsity.prune_low_magnitude(
        Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
        **pruning_params))
    # pruned_model.add(l.Dropout(0.2))
    pruned_model.add(sparsity.prune_low_magnitude(
        Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
        **pruning_params))
    # pruned_model.add(l.Dropout(0.1))
    pruned_model.add(sparsity.prune_low_magnitude(
        Dense(units = num_classes, activation='softmax'),
        **pruning_params))
    
    # Compile pruned CNN-
    pruned_model.compile(
        loss=tf.keras.losses.categorical_crossentropy,
        # optimizer='adam',
        optimizer=tf.keras.optimizers.Adam(lr = 0.001),
        metrics=['accuracy'])
    
    return pruned_model


In [14]:
# Initialize model-
model = pruned_nn(pruning_params_unpruned)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [15]:
# Load winning ticket weights-
model.load_weights("Winning_Ticket_Weights_Experimental.h5")

In [16]:
# Strip the pruning wrappers from pruned model-
model_stripped = sparsity.strip_pruning(model)

In [17]:
print("\nIn Winning Ticket, number of nonzero parameters in each layer are: \n")

model_sum_params = 0

for layer in model_stripped.trainable_weights:
    print(tf.math.count_nonzero(layer, axis = None).numpy())
    model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()

print("\nTotal number of trainable parameters = {0}\n".format(model_sum_params))


In Winning Ticket, number of nonzero parameters in each layer are: 

49627
0
6330
0
211
0

Total number of trainable parameters = 56168



In [20]:
# Instantiate a new neural network model for which, the mask is to be created,
# according to the paper-
mask_model = pruned_nn(pruning_params_unpruned)

In [21]:
# Load weights of GradientTape trained and PRUNED model-
# mask_model.load_weights("Pruned_Weights.h5")
mask_model.load_weights("Winning_Ticket_Weights_Experimental.h5")

In [22]:
# Strip the model of its pruning parameters-
mask_model_stripped = sparsity.strip_pruning(mask_model)

In [23]:
# For each layer, for each weight which is 0, leave it, as is.
# And for weights which survive the pruning,reinitialize it to ONE (1)-

for wts in mask_model_stripped.trainable_weights:
    wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))

### Prepare dataset for _GradientTape_:

In [24]:
# Create training and testing datasets-
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [25]:
train_dataset = train_dataset.shuffle(buffer_size = 20000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)

In [26]:
test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)

In [27]:
# Choose an optimizer and loss function for training-
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(lr = 0.001)

In [28]:
# Select metrics to measure the error & accuracy of model.
# These metrics accumulate the values over epochs and then
# print the overall result-
train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')

In [29]:
@tf.function
def train_one_step(model, mask_model, optimizer, x, y):
    '''
    def train_step(data, labels):
    Function to compute one step of gradient descent optimization
    '''
    with tf.GradientTape() as tape:
        # Make predictions using defined model-
        y_pred = model(x)

        # Compute loss-
        loss = loss_fn(y, y_pred)
        
    # Compute gradients wrt defined loss and weights and biases-
    grads = tape.gradient(loss, model.trainable_variables)
    
    # type(grads)
    # list
    
    # List to hold element-wise multiplication between-
    # computed gradient and masks-
    grad_mask_mul = []
    
    # Perform element-wise multiplication between computed gradients and masks-
    for grad_layer, mask in zip(grads, mask_model.trainable_weights):
        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))
    
    # Apply computed gradients to model's weights and biases-
    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))

    # Compute accuracy-
    train_loss(loss)
    train_accuracy(y, y_pred)

    return None

In [30]:
@tf.function
def test_step(model, optimizer, data, labels):
    """
    Function to test model performance
    on testing dataset
    """
    
    predictions = model(data)
    t_loss = loss_fn(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

    return None


In [31]:
# Dictionary to hold scalar metrics-
history = {}

history['accuracy'] = np.zeros(num_epochs)
history['val_accuracy'] = np.zeros(num_epochs)
history['loss'] = np.zeros(num_epochs)
history['val_loss'] = np.zeros(num_epochs)

In [32]:
# User input-
minimum_delta = 0.001
patience = 3

In [33]:
best_val_loss = 1
loc_patience = 0

In [34]:
for epoch in range(num_epochs):
    
    if loc_patience >= patience:
        print("\n'EarlyStopping' called!\n")
        break
        
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()
            
    for x, y in train_dataset:
        # train_step(x, y)
        train_one_step(model_stripped, mask_model_stripped, optimizer, x, y)

    for x_t, y_t in test_dataset:
        # test_step(x_t, y_t)
        test_step(model_stripped, optimizer, x_t, y_t)

    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'
    
    history['accuracy'][epoch] = train_accuracy.result()
    history['loss'][epoch] = train_loss.result()
    history['val_loss'][epoch] = test_loss.result()
    history['val_accuracy'][epoch] = test_accuracy.result()

    print(template.format(epoch + 1, 
                              train_loss.result(), train_accuracy.result()*100,
                              test_loss.result(), test_accuracy.result()*100))
    
    # Count number of non-zero parameters in each layer and in total-
    # print("layer-wise manner model, number of nonzero parameters in each layer are: \n")

    model_sum_params = 0
    
    for layer in model_stripped.trainable_weights:
        # print(tf.math.count_nonzero(layer, axis = None).numpy())
        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()
    
    print("Total number of trainable parameters = {0}\n".format(model_sum_params))

    
    # Code for manual Early Stopping:
    if np.abs(test_loss.result() < best_val_loss) >= minimum_delta:
        # update 'best_val_loss' variable to lowest loss encountered so far-
        best_val_loss = test_loss.result()
        
        # reset 'loc_patience' variable-
        loc_patience = 0
        
    else:  # there is no improvement in monitored metric 'val_loss'
        loc_patience += 1  # number of epochs without any improvement
    

Epoch 1, Loss: 0.1759, Accuracy: 98.9948, Test Loss: 0.0746, Test Accuracy: 99.534950
Total number of trainable parameters = 56168

Epoch 2, Loss: 0.0457, Accuracy: 99.7279, Test Loss: 0.0566, Test Accuracy: 99.618980
Total number of trainable parameters = 56168

Epoch 3, Loss: 0.0246, Accuracy: 99.8562, Test Loss: 0.0575, Test Accuracy: 99.628975
Total number of trainable parameters = 56168

Epoch 4, Loss: 0.0148, Accuracy: 99.9216, Test Loss: 0.0589, Test Accuracy: 99.656952
Total number of trainable parameters = 56168

Epoch 5, Loss: 0.0088, Accuracy: 99.9549, Test Loss: 0.0689, Test Accuracy: 99.633965
Total number of trainable parameters = 56168


'EarlyStopping' called!



In [35]:
# Resize numpy arrays according to the epoch when 'EarlyStopping' was called-
for metrics in history.keys():
    history[metrics] = np.resize(history[metrics], new_shape=epoch)

In [36]:
model.evaluate(X_test, y_test, verbose=0)

[0.06900685519112594, 0.9815]

In [37]:
y_pred = model.predict_classes(X_test)

In [41]:
accuracy = accuracy_score(np.argmax(y_test, axis = 1), y_pred)

print("\nAccuracy of Winning Ticket (5-round) = {0:.4f}\n".format(accuracy))


Accuracy of Winning Ticket (5-round) = 0.9815

