In [1]:
#! pip install -q tensorflow-model-optimization
#! pip install tensorflow-addons

https://arxiv.org/abs/1803.03635

In [2]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from tensorflow.keras import backend as K
import tensorflow as tf
import random as rn
import os
import tensorflow.keras
import matplotlib.pyplot as plt
from IPython.display import clear_output
import numpy as np
from sklearn.model_selection import StratifiedKFold, train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, LeakyReLU, PReLU, BatchNormalization
from tensorflow.keras import optimizers
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
import pandas as pd
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import LearningRateScheduler
import tensorflow_model_optimization as tfmot
from tensorflow.keras.models import Model
import tensorflow.keras as keras
from tensorflow.keras.layers import Input

In [3]:
def set_random_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    tf.random.set_seed(seed)
    # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

# Cargo datos

In [4]:
folder = './'

In [5]:
X = np.load(folder+'train_images.npy').reshape(-1, 784)/255
y = np.loadtxt(folder+'train_labels.csv', delimiter=',', skiprows=1).reshape(-1, 1)
X_test = np.load(folder+'test_images.npy').reshape(-1, 784)/255
y_test = pd.read_csv(folder+'test_labels.csv')['Category'].values

# Red neuronal básica

In [6]:
def get_model(l1=1e-5, input_dim=784):
    output_size = 10
    # initializer = initializers.normal(mean=0, stddev=0.001)
    initializer = 'normal'
    # Creo el modelo
    model = Sequential()
    model.add(Dense(784*2, activation='linear', kernel_initializer=initializer, name='hidden_1', input_dim=input_dim,
                    kernel_regularizer=regularizers.l1(l=l1))) #
    model.add(BatchNormalization())
    model.add(PReLU())
    # model.add(Dropout(0.2))
    model.add(Dense(784, activation='linear', kernel_initializer=initializer, name='hidden_2', 
                    kernel_regularizer=regularizers.l1(l=l1)
                   ))
    model.add(BatchNormalization())
    model.add(PReLU())
#     model.add(Dense(400, activation='linear', kernel_initializer=initializer))
#     model.add(LeakyReLU())
    model.add(Dense(output_size, kernel_initializer=initializer, name='Salida'))
    model.add(Activation('softmax'))
    return model
model = get_model()
# model.save_weights('random-init.hdf5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 1568)              1230880   
_________________________________________________________________
batch_normalization (BatchNo (None, 1568)              6272      
_________________________________________________________________
p_re_lu (PReLU)              (None, 1568)              1568      
_________________________________________________________________
hidden_2 (Dense)             (None, 784)               1230096   
_________________________________________________________________
batch_normalization_1 (Batch (None, 784)               3136      
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 784)               784       
_________________________________________________________________
Salida (Dense)               (None, 10)                7

# Train network

In [8]:
set_random_seeds(42)
batch_size = 256
lr = 0.001
epochs = 96


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
model = get_model(l1=0)
optim = optimizers.Adam(lr=lr)
rop = ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1)
model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optim, metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath=f'mlp.mnist_no_kfold.hdf5', 
                               verbose=1, save_best_only=True, monitor='val_accuracy', mode='max')
model.save_weights('initial_weights.hdf5')
history = model.fit(X_train, 
           y_train,
           epochs=epochs, batch_size=batch_size, 
          validation_data = (X_val, y_val),
           verbose=2, 
          callbacks=[checkpointer, rop],
                shuffle = True
          )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.85078, saving model to mlp.mnist_no_kfold.hdf5
200/200 - 1s - loss: 0.4261 - accuracy: 0.8468 - val_loss: 0.4076 - val_accuracy: 0.8508 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy did not improve from 0.85078
200/200 - 1s - loss: 0.3085 - accuracy: 0.8860 - val_loss: 0.4139 - val_accuracy: 0.8487 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy improved from 0.85078 to 0.85111, saving model to mlp.mnist_no_kfold.hdf5
200/200 - 1s - loss: 0.2588 - accuracy: 0.9039 - val_loss: 0.4222 - val_accuracy: 0.8511 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy improved from 0.85111 to 0.85444, saving model to mlp.mnist_no_kfold.hdf5
200/200 - 1s - loss: 0.2289 - accuracy: 0.9140 - val_loss: 0.3806 - val_accuracy: 0.8544 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy did not improve from 0.85444
200/200 - 1s - loss: 0.1999 - accuracy: 0.9254 - val_loss: 0.7009 - val_accuracy: 0.7972 - lr: 0.0010
Epoch 6/96

Epoch 00006

In [9]:
model.evaluate(X_val, y_val, verbose=0)

[0.6134416460990906, 0.907444417476654]

In [10]:
model.load_weights('mlp.mnist_no_kfold.hdf5')
print(model.evaluate(X_val, y_val, verbose=0))
model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optim, metrics=['accuracy'])
model.evaluate(X_test, y_test, verbose=0)

[0.5902414917945862, 0.9081110954284668]


[0.6433372497558594, 0.9057999849319458]

# Get MASK

In [36]:
sparcity = 0.50
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
sprasity_sched = tfmot.sparsity.keras.ConstantSparsity(
    sparcity, 0, end_step=-1, frequency=10000000)

In [37]:
print([layer.name for layer in model.layers])

['hidden_1', 'batch_normalization_4', 'p_re_lu_4', 'hidden_2', 'batch_normalization_5', 'p_re_lu_5', 'Salida', 'activation_2']


In [66]:
model.load_weights('mlp.mnist_no_kfold.hdf5')
layer_to_pune = ['hidden_1', 'hidden_2', 'Salida']
prunned_model_layers = []
for layer in model.layers:
    if layer.name in layer_to_pune:
        prunned_model_layers.append(prune_low_magnitude(layer, sprasity_sched))
    else:
        prunned_model_layers.append(layer)
pruned_model_trained = keras.Sequential(prunned_model_layers)
pruned_model_trained.compile(loss = 'sparse_categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-10), metrics=['accuracy'])
pruned_model_trained.evaluate(X_test, y_test, verbose=0)

[0.6433372497558594, 0.9057999849319458]

In [67]:
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
    #CyclicLR(1e-9, 0.001, 2*iters_per_epoch, mode='triangular'),
    ModelCheckpoint(filepath=f'mlp.mnist_no_kfold_sparse.hdf5', verbose=1, save_best_only=True, monitor='val_accuracy', mode='max'),
#    tfmot.sparsity.keras.PruningSummaries(log_dir='logs'),
]

In [68]:
pruned_model_trained.fit(X_train, 
               y_train,
               epochs=1, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=2, 
                    shuffle = True,
                           callbacks=callbacks
              )


Epoch 00001: val_accuracy improved from -inf to 0.90378, saving model to mlp.mnist_no_kfold_sparse.hdf5
200/200 - 1s - loss: 0.0013 - accuracy: 0.9999 - val_loss: 0.5363 - val_accuracy: 0.9038


<tensorflow.python.keras.callbacks.History at 0x7f24f05ed0f0>

In [69]:
print([layer.name for layer in pruned_model_trained.layers])

['prune_low_magnitude_hidden_1', 'batch_normalization_4', 'p_re_lu_4', 'prune_low_magnitude_hidden_2', 'batch_normalization_5', 'p_re_lu_5', 'prune_low_magnitude_Salida', 'activation_2']


In [70]:
mask = pruned_model_trained.get_layer('prune_low_magnitude_hidden_1').pruning_vars[0][1]

In [43]:
model.load_weights('mlp.mnist_no_kfold.hdf5')
weights_abs = np.abs(model.get_layer('hidden_1').get_weights()[0])
print(np.min(weights_abs[mask==1]) > np.max(weights_abs[mask==0]))

True


In [44]:
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper

# Initialize prunned model

In [45]:
model.load_weights('initial_weights.hdf5')
for i, layer in enumerate(pruned_model_trained.layers):
    if isinstance(layer, pruning_wrapper.PruneLowMagnitude):
        print(i)
        l_weights = model.layers[i].get_weights()
        l_weights[0] = l_weights[0]*layer.pruning_vars[0][1].numpy()
        model.layers[i].set_weights(l_weights)

0
3
6


In [46]:
l_index = 6
(model.layers[l_index].get_weights()[0] == 0).sum()/np.product(model.layers[l_index].get_weights()[0].shape)

0.5

In [50]:
prunned_model_layers = []
for layer in model.layers:
    if layer.name in layer_to_pune:
        prunned_model_layers.append(prune_low_magnitude(layer, sprasity_sched))
    else:
        prunned_model_layers.append(layer)
pruned_model = keras.Sequential(prunned_model_layers)
pruned_model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optimizers.Adam(lr=lr), metrics=['accuracy'])
pruned_model.evaluate(X_test, y_test, verbose=0)

[1.324739933013916, 0.5996000170707703]

In [51]:
(pruned_model.layers[l_index].pruning_vars[0][1].numpy() == pruned_model_trained.layers[l_index].pruning_vars[0][1].numpy()).sum()/np.product(model.layers[l_index].get_weights()[0].shape)

0.5

In [52]:
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
    ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1),
    ModelCheckpoint(filepath=f'mlp.mnist_no_kfold_sparse.hdf5', verbose=1, save_best_only=True, monitor='val_accuracy', mode='auto'),
   # tfmot.sparsity.keras.PruningSummaries(log_dir='logs'),
]

# Train prunned model

In [53]:
history = pruned_model.fit(X_train, 
               y_train,
               epochs=96, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=0, 
                    shuffle = True,
                           callbacks=callbacks
              )


Epoch 00001: val_accuracy improved from -inf to 0.85567, saving model to mlp.mnist_no_kfold_sparse.hdf5

Epoch 00002: val_accuracy improved from 0.85567 to 0.87344, saving model to mlp.mnist_no_kfold_sparse.hdf5

Epoch 00003: val_accuracy did not improve from 0.87344

Epoch 00004: val_accuracy did not improve from 0.87344

Epoch 00005: val_accuracy did not improve from 0.87344

Epoch 00006: val_accuracy improved from 0.87344 to 0.87700, saving model to mlp.mnist_no_kfold_sparse.hdf5

Epoch 00007: val_accuracy did not improve from 0.87700

Epoch 00008: val_accuracy improved from 0.87700 to 0.87867, saving model to mlp.mnist_no_kfold_sparse.hdf5

Epoch 00009: val_accuracy did not improve from 0.87867

Epoch 00010: val_accuracy did not improve from 0.87867

Epoch 00011: val_accuracy did not improve from 0.87867

Epoch 00012: val_accuracy improved from 0.87867 to 0.88311, saving model to mlp.mnist_no_kfold_sparse.hdf5

Epoch 00013: val_accuracy improved from 0.88311 to 0.88656, saving mod

In [54]:
pruned_model.evaluate(X_val, y_val, verbose=0)

[0.6450605392456055, 0.910444438457489]

In [55]:
pruned_model.load_weights('mlp.mnist_no_kfold_sparse.hdf5')
print(pruned_model.evaluate(X_val, y_val, verbose=0))
pruned_model.evaluate(X_test, y_test, verbose=0)

[0.6450605392456055, 0.910444438457489]


[0.7109397053718567, 0.9041000008583069]

# check original mask equals prunned model mask

In [82]:
layer_name = 'prune_low_magnitude_hidden_1'
#layer_name = 'prune_low_magnitude_Salida'
old_mask = pruned_model_trained.get_layer(layer_name).pruning_vars[0][1].numpy()
(old_mask == pruned_model.get_layer(layer_name).pruning_vars[0][1].numpy()).sum()/np.product(old_mask.shape)

1.0