In [1]:
#! pip install -q tensorflow-model-optimization
#! pip install tensorflow-addons

https://arxiv.org/abs/1803.03635

https://www.youtube.com/watch?v=0VH1Lim8gL8&feature=youtu.be&t=2760

In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization
import os
import random as rn
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import tensorflow_model_optimization as tfmot

# import tensorflow_addons as tfa
# from tensorflow.keras import backend as K


# import tensorflow.keras
# import matplotlib.pyplot as plt
# from IPython.display import clear_output




# from tensorflow.keras.callbacks import LearningRateScheduler

# from tensorflow.keras.models import Model
# import tensorflow.keras as keras
# from tensorflow.keras.layers import Input

In [3]:
def set_random_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    tf.random.set_seed(seed)
    # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

# Cargo datos

In [4]:
folder = './'

In [5]:
X = np.load(folder+'train_images.npy').reshape(-1, 784)/255
y = np.loadtxt(folder+'train_labels.csv', delimiter=',', skiprows=1).reshape(-1, 1)
X_test = np.load(folder+'test_images.npy').reshape(-1, 784)/255
y_test = pd.read_csv(folder+'test_labels.csv')['Category'].values

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)

# Red neuronal básica

In [7]:
def compile_model(model, lr=0.001):
    optim = optimizers.Adam(lr=lr)
    model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optim, metrics=['accuracy'])

In [8]:
def get_model(compile_model_flag=True, lr=0.001):
    input_dim=784
    output_size = 10
    # Creo el modelo
    model = Sequential()
    model.add(Dense(784*2, activation='linear', name='hidden_1', input_dim=input_dim))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(784, activation='linear', name='hidden_2'))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(output_size, name='Salida'))
    model.add(Activation('softmax'))
    if compile_model_flag:
        compile_model(model, lr=lr)
    return model
model = get_model()
# model.save_weights('random-init.hdf5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 1568)              1230880   
_________________________________________________________________
activation (Activation)      (None, 1568)              0         
_________________________________________________________________
hidden_2 (Dense)             (None, 784)               1230096   
_________________________________________________________________
activation_1 (Activation)    (None, 784)               0         
_________________________________________________________________
Salida (Dense)               (None, 10)                7850      
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 2,468,826
Trainable params: 2,468,826
Non-trainable params: 0
______________________________________________

# Train network

In [9]:
def get_callbacks(filename):
    return [
        ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1, min_lr=1e-4),
        ModelCheckpoint(filepath=filename,  verbose=1, save_best_only=True, monitor='val_accuracy', mode='max')
    ]

In [10]:
batch_size = 256
epochs = 96

In [11]:
set_random_seeds(42)
model = get_model()

# Save initial weights
model.save_weights('mlp.mnist.initial_weights.hdf5')
history = model.fit(X_train, 
           y_train,
           epochs=epochs, batch_size=batch_size, 
           validation_data = (X_val, y_val),
           verbose=2, 
           callbacks=get_callbacks('mlp.mnist.first_train.hdf5')
          )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.84922, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.5000 - accuracy: 0.8201 - val_loss: 0.4116 - val_accuracy: 0.8492 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy improved from 0.84922 to 0.86811, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.3492 - accuracy: 0.8725 - val_loss: 0.3592 - val_accuracy: 0.8681 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy did not improve from 0.86811
200/200 - 0s - loss: 0.3132 - accuracy: 0.8825 - val_loss: 0.3740 - val_accuracy: 0.8577 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy improved from 0.86811 to 0.88033, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.2932 - accuracy: 0.8898 - val_loss: 0.3202 - val_accuracy: 0.8803 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy did not improve from 0.88033
200/200 - 1s - loss: 0.2721 - accuracy: 0.8989 - val_loss: 0.3401 - val_accuracy: 0.8731 - lr: 0.0010
Epoch 6/96

Ep

In [12]:
model.evaluate(X_val, y_val, verbose=0)

[0.8217623829841614, 0.9031111001968384]

In [13]:
model.load_weights('mlp.mnist.first_train.hdf5')
print(model.evaluate(X_val, y_val, verbose=0), model.evaluate(X_test, y_test, verbose=0))

[0.374520480632782, 0.9061111211776733] [0.40363335609436035, 0.8996000289916992]


# Get MASK

In [14]:
# pm**(1/3)

In [15]:
print([layer.name for layer in model.layers])

['hidden_1', 'activation_3', 'hidden_2', 'activation_4', 'Salida', 'activation_5']


In [16]:
def initialize_sparse_model(filename, pruned_model_with_mask, pm = 0.20):
    sparcity = 1 - pm
    sprasity_sched = tfmot.sparsity.keras.ConstantSparsity(
        sparcity, 
        0, # Do sparcity calculation in the first step
        end_step=0, 
        frequency=10000000
    )
    model = get_model()
    model.load_weights(filename)
    prunned_model_layers = []
    for i, layer in enumerate(pruned_model_with_mask.layers):
        if isinstance(layer, tfmot.sparsity.keras.pruning_wrapper.PruneLowMagnitude):
            l_weights = model.layers[i].get_weights()
            l_weights[0] = l_weights[0]*layer.pruning_vars[0][1].numpy()
            model.layers[i].set_weights(l_weights)        
            prunned_model_layers.append(tfmot.sparsity.keras.prune_low_magnitude(model.layers[i], sprasity_sched))
        else:
            prunned_model_layers.append(model.layers[i])
    prunned_model = Sequential(prunned_model_layers)
    prunned_model.compile(optimizer=optimizers.SGD(lr=0), loss='sparse_categorical_crossentropy', metrics='accuracy')
    return prunned_model

def get_prunned_model(filename, layers_to_pune, X_train, y_train, pm = 0.20):
    
    sparcity = 1 - pm
    sprasity_sched = tfmot.sparsity.keras.ConstantSparsity(
        sparcity, 
        0, # Do sparcity calculation in the first step
        end_step=0, 
        frequency=10000000
    )
    model = get_model()
    model.load_weights(filename)
    prunned_model_layers = []
    for layer in model.layers:
        if layer.name in layers_to_pune:
            prunned_model_layers.append(tfmot.sparsity.keras.prune_low_magnitude(layer, sprasity_sched))
        else:
            prunned_model_layers.append(layer)
    pruned_model = Sequential(prunned_model_layers)
    callbacks = [
        tfmot.sparsity.keras.UpdatePruningStep(),
#         tfmot.sparsity.keras.PruningSummaries(log_dir='logs'),
        ]
    
    # This is necesary to make keras calculate the mask, learning rate is 0
    pruned_model.compile(optimizer=optimizers.SGD(lr=0), loss='sparse_categorical_crossentropy', metrics='accuracy')
    pruned_model.fit(X_train[0:1], y_train[0:1], epochs=1, batch_size=batch_size, verbose=0, callbacks=callbacks)
    return pruned_model

In [17]:
layers_to_pune = ['hidden_1', 'hidden_2', 'Salida']
model_pruned_layers_trained = get_prunned_model('mlp.mnist.first_train.hdf5', layers_to_pune, X_train, y_train)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [18]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.42743778228759766, 0.8539999723434448]

In [19]:
model_initialized = initialize_sparse_model('mlp.mnist.first_train.hdf5', model_pruned_layers_trained, )
model_initialized.evaluate(X_test, y_test)



[0.42743778228759766, 0.8539999723434448]

In [20]:
initial_mask = model_pruned_layers_trained.get_layer('prune_low_magnitude_hidden_1').pruning_vars[0][1]

In [21]:
def verify_mask_with_model_min_weights(model_, pruned_model):
    if type(model_) == str:
        model = get_model()
        model.load_weights(model_)
    else:
        model = model_
    for i, layer in enumerate(pruned_model.layers):
        if isinstance(layer, tfmot.sparsity.keras.pruning_wrapper.PruneLowMagnitude):
            weights_abs = np.abs(model.layers[i].get_weights()[0])
            mask = layer.pruning_vars[0][1].numpy()
            
            # Verify that min of weights with mask 1 is higher than max of weights with mask 0
            print(f'{layer.name}: {np.min(weights_abs[mask==1]) > np.max(weights_abs[mask==0])}, shape: {mask.shape}, sparcity: {1 - mask.sum()/np.product(mask.shape)}')
            
verify_mask_with_model_min_weights('mlp.mnist.first_train.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


In [22]:
verify_mask_with_model_min_weights('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: False, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: False, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: False, shape: (784, 10), sparcity: 0.8


In [23]:
# from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper

# Initialize prunned model

In [24]:
pruned_model = initialize_sparse_model('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained)
compile_model(pruned_model)

In [25]:
verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


In [26]:
l_index = 0
(pruned_model.layers[l_index].get_weights()[0] == 0).sum()/np.product(model.layers[l_index].get_weights()[0].shape)

0.8000003253852561

In [27]:
# (pruned_model.layers[l_index].pruning_vars[0][1].numpy() == model_pruned_layers_trained.layers[l_index].pruning_vars[0][1].numpy()).sum()/np.product(model.layers[l_index].get_weights()[0].shape)

In [28]:
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
    ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1),
    ModelCheckpoint(filepath=f'mlp.mnist_no_kfold_sparse.hdf5', verbose=1, save_best_only=True, monitor='val_accuracy', mode='auto'),
   # tfmot.sparsity.keras.PruningSummaries(log_dir='logs'),
]

# Train prunned model

In [29]:
history = pruned_model.fit(X_train, 
               y_train,
               epochs=96, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=2, 
                    shuffle = True,
                           callbacks=get_callbacks('mlp.mnist.sparse_train.hdf5') + [tfmot.sparsity.keras.UpdatePruningStep()]
              )

Epoch 1/96

Epoch 00001: val_accuracy improved from -inf to 0.85711, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.5636 - accuracy: 0.8264 - val_loss: 0.3956 - val_accuracy: 0.8571 - lr: 0.0010
Epoch 2/96

Epoch 00002: val_accuracy improved from 0.85711 to 0.88311, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.3312 - accuracy: 0.8794 - val_loss: 0.3293 - val_accuracy: 0.8831 - lr: 0.0010
Epoch 3/96

Epoch 00003: val_accuracy improved from 0.88311 to 0.89167, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2799 - accuracy: 0.8974 - val_loss: 0.3099 - val_accuracy: 0.8917 - lr: 0.0010
Epoch 4/96

Epoch 00004: val_accuracy did not improve from 0.89167
200/200 - 1s - loss: 0.2506 - accuracy: 0.9074 - val_loss: 0.3120 - val_accuracy: 0.8871 - lr: 0.0010
Epoch 5/96

Epoch 00005: val_accuracy improved from 0.89167 to 0.89322, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2263 - accuracy: 0.9162 - val_loss: 0.295

In [30]:
pruned_model.evaluate(X_val, y_val, verbose=0)

[0.9045692086219788, 0.9048888683319092]

In [31]:
pruned_model.load_weights('mlp.mnist.sparse_train.hdf5')
print(pruned_model.evaluate(X_val, y_val, verbose=0))
pruned_model.evaluate(X_test, y_test, verbose=0)

[0.8550835251808167, 0.9056666493415833]


[0.9800007939338684, 0.9009000062942505]

# check original mask equals prunned model mask

In [32]:
verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


In [33]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.42743778228759766, 0.8539999723434448]

In [34]:
pruned_model.evaluate(X_test, y_test)



[0.9800007939338684, 0.9009000062942505]