In [1]:
#! pip install -q tensorflow-model-optimization
#! pip install tensorflow-addons

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from LTH_helper import LTH

https://arxiv.org/abs/1803.03635

https://www.youtube.com/watch?v=0VH1Lim8gL8&feature=youtu.be&t=2760

In [4]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization
import os
import random as rn
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import tensorflow_model_optimization as tfmot

In [5]:
def set_random_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    tf.random.set_seed(seed)
    # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

# Cargo datos

In [6]:
folder = './'

In [7]:
X = np.load(folder+'train_images.npy').reshape(-1, 784)/255
y = np.loadtxt(folder+'train_labels.csv', delimiter=',', skiprows=1).reshape(-1, 1)
X_test = np.load(folder+'test_images.npy').reshape(-1, 784)/255
y_test = pd.read_csv(folder+'test_labels.csv')['Category'].values

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)

# Red neuronal básica

In [9]:
def compile_model(model, lr=0.001):
    optim = optimizers.Adam(lr=lr)
    model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optim, metrics=['accuracy'])

In [10]:
def get_model(compile_model_flag=True, lr=0.001):
    input_dim=784
    output_size = 10
    # Creo el modelo
    model = Sequential()
    model.add(Dense(784*2, activation='linear', name='hidden_1', input_dim=input_dim))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(784, activation='linear', name='hidden_2'))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(output_size, name='Salida'))
    model.add(Activation('softmax'))
    if compile_model_flag:
        compile_model(model, lr=lr)
    return model
model = get_model()
# model.save_weights('random-init.hdf5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 1568)              1230880   
_________________________________________________________________
activation (Activation)      (None, 1568)              0         
_________________________________________________________________
hidden_2 (Dense)             (None, 784)               1230096   
_________________________________________________________________
activation_1 (Activation)    (None, 784)               0         
_________________________________________________________________
Salida (Dense)               (None, 10)                7850      
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 2,468,826
Trainable params: 2,468,826
Non-trainable params: 0
______________________________________________

In [11]:
lth = LTH(get_model, compile_model)

# Train network

In [12]:
def get_callbacks(filename):
    return [
        ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1, min_lr=1e-4),
        ModelCheckpoint(filepath=filename,  verbose=1, save_best_only=True, monitor='val_accuracy', mode='max')
    ]

In [13]:
batch_size = 256
epochs = 96

In [14]:
set_random_seeds(42)
model = get_model()

# Save initial weights
model.save_weights('mlp.mnist.initial_weights.hdf5')
history = model.fit(X_train, 
           y_train,
           epochs=epochs, batch_size=batch_size, 
           validation_data = (X_val, y_val),
           verbose=2, 
           callbacks=get_callbacks('mlp.mnist.first_train.hdf5')
          )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.84467, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.5001 - accuracy: 0.8203 - val_loss: 0.4198 - val_accuracy: 0.8447 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy improved from 0.84467 to 0.86889, saving model to mlp.mnist.first_train.hdf5
200/200 - 0s - loss: 0.3498 - accuracy: 0.8717 - val_loss: 0.3537 - val_accuracy: 0.8689 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy did not improve from 0.86889
200/200 - 0s - loss: 0.3130 - accuracy: 0.8828 - val_loss: 0.3678 - val_accuracy: 0.8591 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy improved from 0.86889 to 0.87989, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.2922 - accuracy: 0.8915 - val_loss: 0.3204 - val_accuracy: 0.8799 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy did not improve from 0.87989
200/200 - 0s - loss: 0.2709 - accuracy: 0.8994 - val_loss: 0.3334 - val_accuracy: 0.8770 - lr: 0.0010
Epoch 6/96

Ep

In [15]:
model.evaluate(X_val, y_val, verbose=0)

[0.8279619216918945, 0.9031111001968384]

In [16]:
model.load_weights('mlp.mnist.first_train.hdf5')
print(model.evaluate(X_val, y_val, verbose=0), model.evaluate(X_test, y_test, verbose=0))

[0.4022064208984375, 0.9081110954284668] [0.45946744084358215, 0.9025999903678894]


# Get MASK

In [17]:
pm = 0.50
# pm**(1/3)

In [18]:
print([layer.name for layer in model.layers])

['hidden_1', 'activation_3', 'hidden_2', 'activation_4', 'Salida', 'activation_5']


In [19]:
layers_to_pune = ['hidden_1', 'hidden_2', 'Salida']
model_pruned_layers_trained = lth.get_prunned_model('mlp.mnist.first_train.hdf5', layers_to_pune, X_train, y_train, pm)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [20]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.4111524820327759, 0.8985999822616577]

In [21]:
model_initialized = lth.initialize_sparse_model('mlp.mnist.first_train.hdf5', model_pruned_layers_trained, pm)
model_initialized.evaluate(X_test, y_test)



[0.4111524820327759, 0.8985999822616577]

In [22]:
lth.verify_mask_with_model_min_weights('mlp.mnist.first_train.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.5
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.5
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.5


In [23]:
# It has to be false beacuase the model is not trained
lth.verify_mask_with_model_min_weights('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: False, shape: (784, 1568), sparcity: 0.5
prune_low_magnitude_hidden_2: False, shape: (1568, 784), sparcity: 0.5
prune_low_magnitude_Salida: False, shape: (784, 10), sparcity: 0.5


# Initialize prunned model

In [24]:
pruned_model = lth.initialize_sparse_model('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained, pm)
lth.compile_model(pruned_model)

In [25]:
lth.verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.5
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.5
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.5


# Train prunned model

In [26]:
def get_prunned_callbacks(filename):
    return get_callbacks(filename) + [tfmot.sparsity.keras.UpdatePruningStep()]

In [27]:
history = pruned_model.fit(X_train, 
               y_train,
               epochs=epochs, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=2, 
                    shuffle = True,
                           callbacks=get_prunned_callbacks('mlp.mnist.sparse_train.hdf5')
              )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.85678, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.4518 - accuracy: 0.8460 - val_loss: 0.3879 - val_accuracy: 0.8568 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy improved from 0.85678 to 0.87644, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2995 - accuracy: 0.8907 - val_loss: 0.3374 - val_accuracy: 0.8764 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy did not improve from 0.87644
200/200 - 1s - loss: 0.2629 - accuracy: 0.9018 - val_loss: 0.3381 - val_accuracy: 0.8738 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy improved from 0.87644 to 0.88678, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2400 - accuracy: 0.9103 - val_loss: 0.3117 - val_accuracy: 0.8868 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy did not improve from 0.88678
200/200 - 1s - loss: 0.2186 - accuracy: 0.9187 - val_loss: 0.3127 - val_accuracy: 0.8858 - lr: 0.0010
Epoch 6/96


In [28]:
pruned_model.evaluate(X_val, y_val, verbose=0)

[0.8886192440986633, 0.9035555720329285]

In [29]:
pruned_model.load_weights('mlp.mnist.sparse_train.hdf5')
print(pruned_model.evaluate(X_val, y_val, verbose=0))
pruned_model.evaluate(X_test, y_test, verbose=0)

[0.6493906378746033, 0.9070000052452087]


[0.7239408493041992, 0.9039000272750854]

# check original mask equals prunned model mask after trainning

In [30]:
lth.verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.5
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.5
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.5


In [31]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.4111524820327759, 0.8985999822616577]

In [32]:
pruned_model.evaluate(X_test, y_test)



[0.7239408493041992, 0.9039000272750854]

# Re-train reducing pm

In [33]:
pruned_model_2 = lth.initialize_sparse_model('mlp.mnist.initial_weights.hdf5', pruned_model, pm**(1/2))
lth.compile_model(pruned_model_2)

In [34]:
history = pruned_model_2.fit(X_train, 
               y_train,
               epochs=epochs, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=2, 
                    shuffle = True,
                           callbacks=get_prunned_callbacks('mlp.mnist.sparse_train.hdf5')
              )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.84722, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.4508 - accuracy: 0.8435 - val_loss: 0.4168 - val_accuracy: 0.8472 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy improved from 0.84722 to 0.87578, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.3223 - accuracy: 0.8815 - val_loss: 0.3429 - val_accuracy: 0.8758 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy did not improve from 0.87578
200/200 - 1s - loss: 0.2888 - accuracy: 0.8916 - val_loss: 0.3647 - val_accuracy: 0.8614 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy improved from 0.87578 to 0.88133, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2707 - accuracy: 0.8994 - val_loss: 0.3225 - val_accuracy: 0.8813 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy did not improve from 0.88133
200/200 - 1s - loss: 0.2510 - accuracy: 0.9058 - val_loss: 0.3323 - val_accuracy: 0.8758 - lr: 0.0010
Epoch 6/96


In [35]:
pruned_model.load_weights('mlp.mnist.sparse_train.hdf5')
print(pruned_model.evaluate(X_val, y_val, verbose=0))
pruned_model.evaluate(X_test, y_test, verbose=0)

[0.41279926896095276, 0.9078888893127441]


[0.4661621153354645, 0.9016000032424927]