In [2]:
#! pip install -q tensorflow-model-optimization
#! pip install tensorflow-addons

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from LTH_helper import LTH

https://arxiv.org/abs/1803.03635

https://www.youtube.com/watch?v=0VH1Lim8gL8&feature=youtu.be&t=2760

In [29]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization
import os
import random as rn
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import tensorflow_model_optimization as tfmot

In [6]:
def set_random_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    tf.random.set_seed(seed)
    # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    # K.set_session(sess)

# Cargo datos

In [7]:
folder = './'

In [8]:
X = np.load(folder+'train_images.npy').reshape(-1, 784)/255
y = np.loadtxt(folder+'train_labels.csv', delimiter=',', skiprows=1).reshape(-1, 1)
X_test = np.load(folder+'test_images.npy').reshape(-1, 784)/255
y_test = pd.read_csv(folder+'test_labels.csv')['Category'].values

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)

# Red neuronal básica

In [10]:
def compile_model(model, lr=0.001):
    optim = optimizers.Adam(lr=lr)
    model.compile(loss = 'sparse_categorical_crossentropy', optimizer=optim, metrics=['accuracy'])

In [11]:
def get_model(compile_model_flag=True, lr=0.001):
    input_dim=784
    output_size = 10
    # Creo el modelo
    model = Sequential()
    model.add(Dense(784*2, activation='linear', name='hidden_1', input_dim=input_dim))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(784, activation='linear', name='hidden_2'))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(output_size, name='Salida'))
    model.add(Activation('softmax'))
    if compile_model_flag:
        compile_model(model, lr=lr)
    return model
model = get_model()
# model.save_weights('random-init.hdf5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 1568)              1230880   
_________________________________________________________________
activation (Activation)      (None, 1568)              0         
_________________________________________________________________
hidden_2 (Dense)             (None, 784)               1230096   
_________________________________________________________________
activation_1 (Activation)    (None, 784)               0         
_________________________________________________________________
Salida (Dense)               (None, 10)                7850      
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 2,468,826
Trainable params: 2,468,826
Non-trainable params: 0
______________________________________________

In [12]:
lth = LTH(get_model, compile_model)

# Train network

In [13]:
def get_callbacks(filename):
    return [
        ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1, min_lr=1e-4),
        ModelCheckpoint(filepath=filename,  verbose=1, save_best_only=True, monitor='val_accuracy', mode='max')
    ]

In [15]:
batch_size = 256
epochs = 10

In [16]:
set_random_seeds(42)
model = get_model()

# Save initial weights
model.save_weights('mlp.mnist.initial_weights.hdf5')
history = model.fit(X_train, 
           y_train,
           epochs=epochs, batch_size=batch_size, 
           validation_data = (X_val, y_val),
           verbose=2, 
           callbacks=get_callbacks('mlp.mnist.first_train.hdf5')
          )

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.84911, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.5009 - accuracy: 0.8201 - val_loss: 0.4079 - val_accuracy: 0.8491 - lr: 0.0010
Epoch 2/10

Epoch 00002: val_accuracy improved from 0.84911 to 0.86822, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.3495 - accuracy: 0.8727 - val_loss: 0.3543 - val_accuracy: 0.8682 - lr: 0.0010
Epoch 3/10

Epoch 00003: val_accuracy did not improve from 0.86822
200/200 - 1s - loss: 0.3133 - accuracy: 0.8828 - val_loss: 0.3611 - val_accuracy: 0.8644 - lr: 0.0010
Epoch 4/10

Epoch 00004: val_accuracy improved from 0.86822 to 0.88044, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.2899 - accuracy: 0.8921 - val_loss: 0.3233 - val_accuracy: 0.8804 - lr: 0.0010
Epoch 5/10

Epoch 00005: val_accuracy improved from 0.88044 to 0.88200, saving model to mlp.mnist.first_train.hdf5
200/200 - 1s - loss: 0.2698 - accuracy: 0.9005 - val_loss: 0.3233 - 

In [17]:
model.evaluate(X_val, y_val, verbose=0)

[0.3346378207206726, 0.8874444365501404]

In [18]:
model.load_weights('mlp.mnist.first_train.hdf5')
print(model.evaluate(X_val, y_val, verbose=0), model.evaluate(X_test, y_test, verbose=0))

[0.3136780261993408, 0.8892222046852112] [0.32768240571022034, 0.8866999745368958]


# Get MASK

In [19]:
# pm**(1/3)

In [20]:
print([layer.name for layer in model.layers])

['hidden_1', 'activation_3', 'hidden_2', 'activation_4', 'Salida', 'activation_5']


In [21]:
layers_to_pune = ['hidden_1', 'hidden_2', 'Salida']
model_pruned_layers_trained = lth.get_prunned_model('mlp.mnist.first_train.hdf5', layers_to_pune, X_train, y_train, pm = 0.20)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [22]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.5985085368156433, 0.8303999900817871]

In [23]:
model_initialized = lth.initialize_sparse_model('mlp.mnist.first_train.hdf5', model_pruned_layers_trained)
model_initialized.evaluate(X_test, y_test)



[0.5985085368156433, 0.8303999900817871]

In [24]:
lth.verify_mask_with_model_min_weights('mlp.mnist.first_train.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


In [25]:
# It has to be false beacuase the model is not trained
lth.verify_mask_with_model_min_weights('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained)

prune_low_magnitude_hidden_1: False, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: False, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: False, shape: (784, 10), sparcity: 0.8


# Initialize prunned model

In [26]:
pruned_model = lth.initialize_sparse_model('mlp.mnist.initial_weights.hdf5', model_pruned_layers_trained)
lth.compile_model(pruned_model)

In [27]:
lth.verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


# Train prunned model

In [30]:
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
    ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=np.sqrt(0.1), patience=10, verbose=1),
    ModelCheckpoint(filepath=f'mlp.mnist_no_kfold_sparse.hdf5', verbose=1, save_best_only=True, monitor='val_accuracy', mode='auto'),
   # tfmot.sparsity.keras.PruningSummaries(log_dir='logs'),
]

In [31]:
history = pruned_model.fit(X_train, 
               y_train,
               epochs=epochs, batch_size=batch_size, 
              validation_data = (X_val, y_val),
               verbose=2, 
                    shuffle = True,
                           callbacks=get_callbacks('mlp.mnist.sparse_train.hdf5') + [tfmot.sparsity.keras.UpdatePruningStep()]
              )

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.87011, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.4811 - accuracy: 0.8490 - val_loss: 0.3529 - val_accuracy: 0.8701 - lr: 0.0010
Epoch 2/10

Epoch 00002: val_accuracy improved from 0.87011 to 0.88711, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2973 - accuracy: 0.8907 - val_loss: 0.3112 - val_accuracy: 0.8871 - lr: 0.0010
Epoch 3/10

Epoch 00003: val_accuracy improved from 0.88711 to 0.89300, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2545 - accuracy: 0.9061 - val_loss: 0.2982 - val_accuracy: 0.8930 - lr: 0.0010
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.89300
200/200 - 1s - loss: 0.2334 - accuracy: 0.9134 - val_loss: 0.2996 - val_accuracy: 0.8900 - lr: 0.0010
Epoch 5/10

Epoch 00005: val_accuracy improved from 0.89300 to 0.89789, saving model to mlp.mnist.sparse_train.hdf5
200/200 - 1s - loss: 0.2145 - accuracy: 0.9203 - val_loss: 0.286

In [32]:
pruned_model.evaluate(X_val, y_val, verbose=0)

[0.3075350821018219, 0.8968889117240906]

In [33]:
pruned_model.load_weights('mlp.mnist.sparse_train.hdf5')
print(pruned_model.evaluate(X_val, y_val, verbose=0))
pruned_model.evaluate(X_test, y_test, verbose=0)

[0.28652212023735046, 0.8978888988494873]


[0.307934045791626, 0.8906999826431274]

# check original mask equals prunned model mask after trainning

In [35]:
lth.verify_mask_with_model_min_weights(pruned_model, model_pruned_layers_trained)

prune_low_magnitude_hidden_1: True, shape: (784, 1568), sparcity: 0.8000003253852561
prune_low_magnitude_hidden_2: True, shape: (1568, 784), sparcity: 0.8000003253852561
prune_low_magnitude_Salida: True, shape: (784, 10), sparcity: 0.8


In [36]:
model_pruned_layers_trained.evaluate(X_test, y_test)



[0.5985085368156433, 0.8303999900817871]

In [37]:
pruned_model.evaluate(X_test, y_test)



[0.307934045791626, 0.8906999826431274]