In [1]:
# Dependencias
import os
import math
import tempfile
import tensorflow as tf 
import numpy as np 
import pandas as pd
import sklearn.metrics
import matplotlib.pyplot as plt 
import tensorflow.keras.backend as K
from tensorflow.keras import datasets, utils, preprocessing
from tensorflow.keras import models, losses, optimizers
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from numpy.random import seed
from tensorflow import random

In [2]:
# Comprobar versión de TensorFlow
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

#apaño para un error
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

 

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
#fin apaño

# Fijar semilla
seed(22)
random.set_seed(22)

2.3.0
Num GPUs Available:  0


In [3]:
#Fijar método y porcentaje de poda
METHOD = 'l1'
PERCENT = 0.2

In [4]:
# Obtener dataset
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [5]:
# Normalizar datos de test y train (originalmente 255 valores)
x_test = x_test.astype('float32')
x_train = x_train.astype('float32')
x_test /= 255
x_train /= 255

img_rows, img_cols = 32, 32
num_dim = 3
num_classes = 10

# Pasar de matriz a vectores
#x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, num_dim)
#x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, num_dim)

print(x_test.shape)
print(x_train.shape)

(10000, 32, 32, 3)
(50000, 32, 32, 3)


In [6]:
# Codificación One-hot para las clases
y_test_oh = to_categorical(y_test, num_classes)
y_train_oh = to_categorical(y_train, num_classes)

print(y_test_oh.shape)
print(y_train_oh.shape)

(10000, 10)
(50000, 10)


In [7]:
# Definición de hiperparámetros
learning_rate = 0.1  # learning rate
batch_size = 128   # Tamaño del batch
epochs = 1  # Número de epochs
adam = optimizers.SGD(learning_rate=learning_rate)

In [8]:
"""
LA PARTE DE PRUNING
"""

'\nLA PARTE DE PRUNING\n'

In [9]:
# Importar modelo Keras
vgg16 = models.load_model('../../models/VGG16_model/vgg16Keras.h5')

# Verificar que el modelo es el correcto
vgg16.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 64)        1792      
_________________________________________________________________
activation (Activation)      (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 64)        256       
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 64)        2

In [10]:
#!pip install folium==0.2.1
#!pip install imgaug==0.2.6
#!pip install kerassurgeon

import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from kerassurgeon import Surgeon, identify
from kerassurgeon.operations import delete_channels, delete_layer
import os
import numpy as np
import math
  
def get_filter_weights(model, layer=None):
    """function to return weights array for one or all conv layers of a Keras model"""
    if layer or layer==0:
        weight_array = model.layers[layer].get_weights()[0]
        
    else:
        weights = [model.layers[layer_ix].get_weights()[0] for layer_ix in range(len(model.layers))\
         if 'conv' in model.layers[layer_ix].name]
        weight_array = [np.array(i) for i in weights]
    
    return weight_array

def get_filters_l1(model, layer=None):
    """Returns L1 norm of a Keras model filters at a given conv layer, if layer=None, returns a matrix of norms
model is a Keras model"""
    if layer or layer==0:
        weights = get_filter_weights(model, layer)
        num_filter = len(weights[0,0,0,:])
        norms_dict = {}
        norms = []
        for i in range(num_filter):
            l1_norm = np.sum(abs(weights[:,:,:,i]))
            norms.append(l1_norm)
    else:
        weights = get_filter_weights(model)
        max_kernels = max([layr.shape[3] for layr in weights])
        norms = np.empty((len(weights), max_kernels))
        norms[:] = np.NaN
        for layer_ix in range(len(weights)):
            # compute norm of the filters
            kernel_size = weights[layer_ix][:,:,:,0].size
            nb_filters = weights[layer_ix].shape[3]
            kernels = weights[layer_ix]
            l1 = [np.sum(abs(kernels[:,:,:,i])) for i in range(nb_filters)]
            # divide by shape of the filters
            l1 = np.array(l1) / kernel_size
            norms[layer_ix, :nb_filters] = l1
    return norms

In [11]:
def get_filters_apoz(model, layer=None):
    
    # Get a sample of the train set , or should it be the validation set ?
    test_generator = ImageDataGenerator(rescale=1./255, validation_split=0.1)

    apoz_generator = test_generator.flow(
                x_test,
                batch_size = 1,
                subset='validation',
                shuffle = False)
    
    if layer or layer ==0:
        assert 'conv' in model.layers[layer].name, "The layer provided is not a convolution layer"
        weights_array = get_filter_weights(model, layer)
        act_ix = layer + 1
        nb_filters = weights_array.shape[3]
        apoz = compute_apoz(model, act_ix, nb_filters, apoz_generator)
                
    else :
        weights_array = get_filter_weights(model)
        max_kernels = max([layr.shape[3] for layr in weights_array])

        conv_indexes = [i for i, v in enumerate(model.layers) if 'conv' in v.name]
        #print('------------------------------------------------')
        activations_indexes = [i for i,v in enumerate(model.layers) if 'activation' \
                       in v.name and 'conv' in model.layers[i-1].name]
        #for i,v in enumerate(model.layers):
          #print(i)
          #print(v)
        #print('------------------------------------------------')
        # create nd array to collect values
        apoz = np.zeros((len(weights_array), max_kernels))

        for i, act_ix in enumerate(activations_indexes):
            # score this sample with our model (trimmed to the layer of interest)
            nb_filters = weights_array[i].shape[3]
            apoz_layer = compute_apoz(model, act_ix, nb_filters, apoz_generator)
            #print('APOZ de la capa {}:'.format(i))
            #print(apoz_layer)
            apoz[i, :nb_filters] = apoz_layer
        
    return apoz


def compute_apoz(model, layer_ix, nb_filters, generator):
    """Compute Average percentage of zeros over a layers activation maps"""
    act_layer = model.get_layer(index=layer_ix)
    node_index = 0
    temp_model = Model(model.inputs,
                               act_layer.get_output_at(node_index)
                              )


            # count the percentage of zeros per activation
    a = temp_model.predict_generator(generator,944, workers=3, verbose=1)
    activations = a.reshape(a.shape[0]*a.shape[1]*a.shape[2],nb_filters).T
    apoz_layer = np.sum(activations == 0, axis=1) / activations.shape[1]
    
    return apoz_layer

In [12]:
#function to return pruned filters with apoz method
def prune_apoz(model, n_pruned, layer=None):
    """returns list of indexes of filter to prune or a matrix layer X filter to prune"""
    if layer or layer==0:
        apoz = get_filters_apoz(model,layer)
        to_prune = np.argsort(apoz)[::-1][:n_pruned]
    
    else:
        apoz = get_filters_apoz(model)
        #print(apoz)
        #print('-------------')
        to_prune = biggest_indices(apoz, n_pruned)
        #print('to prune')
        #print(to_prune)
    
    return to_prune

#function to return pruned filters with l1 method
def prune_l1(model, n_pruned, layer=None):
    """returns list of indexes of filter to prune or a matrix layer X filter to prune"""
    if layer or layer==0:
        norms = get_filters_l1(model,layer)
        to_prune = np.argsort(norms)[:n_pruned]
    
    else:
        norms = get_filters_l1(model)
        to_prune = smallest_indices(norms, n_pruned)
    
    return to_prune

def prune_random(model, n_pruned, layer=None):
    """returns list of indexes of filter to prune or a matrix layer X filter to prune"""
    weights = get_filter_weights(model, layer)
    if layer or layer==0:
        n_filters = weights.shape[3]
        to_prune = np.random.choice(range(n_filters), n_pruned, replace=False)
    else:
        layer_ix = np.random.choice(len(weights))
        filters = weights[layer_ix].shape[3]
        filter_ix = np.random.choice(range(filters))
        to_prune = [[layer_ix, filter_ix]]

        for i in range(n_pruned-1):
            while [layer_ix, filter_ix] in to_prune :
                #choose layer
                layer_ix = np.random.choice(len(weights))
                #choose filter 
                filters = weights[layer_ix].shape[3]
                filter_ix = np.random.choice(range(filters))
            to_prune.append([layer_ix, filter_ix])

        to_prune = np.array(to_prune)
    return to_prune

In [13]:
def compute_pruned_count(model, perc=0.1, layer=None):
    if layer or layer ==0:
        # count nb of filters
        nb_filters = model.layers[layer].output_shape[3]
    else:
        nb_filters = np.sum([model.layers[i].output_shape[3] for i, layer in enumerate(model.layers) 
                                                                if 'conv' in model.layers[i].name])
            
    n_pruned = int(np.floor(perc*nb_filters))
    return n_pruned


def smallest_indices(array, N):
    idx = array.ravel().argsort()[:N]
    return np.stack(np.unravel_index(idx, array.shape)).T

def biggest_indices(array, N):
    idx = array.ravel().argsort()[::-1][:N]
    return np.stack(np.unravel_index(idx, array.shape)).T

In [14]:
from kerassurgeon.operations import delete_channels, delete_layer
from kerassurgeon import Surgeon

def prune_one_layer(model, pruned_indexes, layer_ix, opt):
    """Prunes one layer based on a Keras Model, layer index 
    and indexes of filters to prune"""
    model_pruned = delete_channels(model, model.layers[layer_ix], pruned_indexes)
    model_pruned.compile(loss='categorical_crossentropy',
                          optimizer=opt,
                          metrics=['accuracy'])
    return model_pruned

def prune_multiple_layers(model, pruned_matrix, opt):
    """Prunes several layers based on a Keras Model, layer index and matrix 
    of indexes of filters to prune"""
    conv_indexes = [i for i, v in enumerate(model.layers) if 'conv' in v.name]
    layers_to_prune = np.unique(pruned_matrix[:,0])
    surgeon = Surgeon(model, copy=True)
    to_prune = pruned_matrix
    to_prune[:,0] = np.array([conv_indexes[i] for i in to_prune[:,0]])
    layers_to_prune = np.unique(to_prune[:,0])
    for layer_ix in layers_to_prune :
        pruned_filters = [x[1] for x in to_prune if x[0]==layer_ix]
        print('filtros a podar:')
        print(pruned_filters)
        pruned_layer = model.layers[layer_ix]
        print('capa a podar:')
        print(pruned_layer)
        surgeon.add_job('delete_channels', pruned_layer, channels=pruned_filters)
    
    model_pruned = surgeon.operate()
    model_pruned.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    
    return model_pruned

In [15]:
def prune_model(model, perc, opt, method='l1', layer=None):
    """Prune a Keras model using different methods
    Arguments:
        model: Keras Model object
        perc: a float between 0 and 1
        method: method to prune, can be one of ['l1','apoz','random']
    Returns:
        A pruned Keras Model object
    
    """
    assert method in ['l1','apoz','random'], "Invalid pruning method"
    assert perc >=0 and perc <1, "Invalid pruning percentage"
    
    
    n_pruned = compute_pruned_count(model, perc, layer)
    
    if method =='l1':
        to_prune = prune_l1(model, n_pruned, layer)    
    if method =='apoz':
        to_prune = prune_apoz(model, n_pruned, layer)
    if method =='random':
        to_prune = prune_random(model, n_pruned, layer)    
    if layer or layer ==0:
        model_pruned = prune_one_layer(model, to_prune, layer, opt)
    else:
        model_pruned = prune_multiple_layers(model, to_prune, opt)
            
    return model_pruned

In [16]:
# Podar modelo
model_pruned = prune_model(vgg16, PERCENT, adam, method=METHOD)
model_pruned.summary()

filtros a podar:
[335, 446, 240, 115, 363, 67]
capa a podar:
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f175003fc50>
filtros a podar:
[288, 42, 350, 465, 444, 139, 11, 124, 190, 9, 449, 486, 16, 29, 405, 419, 478, 204, 111, 308, 138, 263, 154, 365, 380, 217, 477, 436, 228, 310, 422, 363, 38, 472, 208, 440, 446, 352, 511, 63, 21, 58, 389, 178, 87, 277, 488, 378, 8, 132, 167, 307, 181, 445, 104, 195, 149, 48, 49, 433, 222, 45, 71, 507, 7, 14, 413, 466, 498, 216, 57, 403, 359, 161, 311, 62, 470, 145, 69, 432, 133, 282, 384, 506, 65, 482, 286, 174, 159, 211, 471, 157, 264, 398, 203, 56, 81, 191, 172, 198, 437, 424, 260, 27, 256, 265, 283, 387, 427, 334, 224, 298, 401, 177, 92, 386, 248, 293, 339, 24, 499, 153, 140, 128, 213, 82, 435, 326, 199, 156, 284, 152, 500, 98, 207, 366, 102, 451, 243, 385, 90, 127, 508, 116, 404, 344, 346, 219, 189, 279, 338, 135, 330, 44, 129, 97, 123, 173, 253, 372, 509, 89, 428, 59, 364, 452, 336, 229, 317, 15, 249, 121, 163, 37, 381, 476, 

In [17]:
# Fine tune
history = model_pruned.fit(x_train, y_train_oh, epochs=epochs, batch_size=batch_size,
                        validation_data=(x_test, y_test_oh))



In [18]:
# Guardar a disco modelo de Keras en formato h5 y h5py
model_pruned.save('vgg16KerasPruned.h5')

In [19]:
# Convertir a Tensorflow Lite sin Cuantización
converter = tf.lite.TFLiteConverter.from_keras_model(model_pruned)
tflite_model = converter.convert()
# Guardar a disco
open("vgg16TFLitePruned.tflite", "wb").write(tflite_model)

# Convertir a Tensorflow Lite con Cuantización de rango dinámico
def representative_dataset():
    for i in range(100):
      x_test_rd = np.expand_dims(x_test, -1)  
      yield [x_test_rd[i].astype(np.float32)]

converter_q = tf.lite.TFLiteConverter.from_keras_model(model_pruned)
converter_q.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter_q.convert()
# Guardar a disco
open("vgg16TFLitePrunedQuant.tflite", "wb").write(tflite_quant_model)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: /tmp/tmpq1jx6hu9/assets


INFO:tensorflow:Assets written to: /tmp/tmpq1jx6hu9/assets


INFO:tensorflow:Assets written to: /tmp/tmp_rpm0kql/assets


INFO:tensorflow:Assets written to: /tmp/tmp_rpm0kql/assets


8896432

In [20]:
"""
PARTE DE COMPARACIÓN DE TAMAÑOS
"""

'\nPARTE DE COMPARACIÓN DE TAMAÑOS\n'

In [21]:
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)


In [22]:
# Comparación de tamaños
print("El tamaño del modelo comprimido en Keras es %.2f bytes" % get_gzipped_model_size('../../models/VGG16_model/vgg16Keras.h5'))
print("------------------------------------------------------")
print("El tamaño del modelo comprimido en Keras PODADO es %.2f bytes" % get_gzipped_model_size('vgg16KerasPruned.h5'))
print("------------------------------------------------------")
print("El tamaño del modelo comprimido PODADO y en TFlite sin cuantizar es %.2f bytes" % get_gzipped_model_size('vgg16TFLitePruned.tflite'))
print("------------------------------------------------------")
print("El tamaño del modelo comprimido PODADO y en TFlite CUANTIZADO es %.2f bytes" % get_gzipped_model_size('vgg16TFLitePrunedQuant.tflite'))

El tamaño del modelo comprimido en Keras es 55639960.00 bytes
------------------------------------------------------
El tamaño del modelo comprimido en Keras PODADO es 32834439.00 bytes
------------------------------------------------------
El tamaño del modelo comprimido PODADO y en TFlite sin cuantizar es 32795822.00 bytes
------------------------------------------------------
El tamaño del modelo comprimido PODADO y en TFlite CUANTIZADO es 7345252.00 bytes


In [23]:
"""
PARTE DE COMPARACIÓN DE PRECISIÓN Y TOMA DE TIEMPOS
"""

'\nPARTE DE COMPARACIÓN DE PRECISIÓN Y TOMA DE TIEMPOS\n'

In [24]:
import time
import statistics

# Probaremos la precisión del modelo de Keras sin podar
scores = vgg16.evaluate(x_test, y_test_oh)
print("\n%s: %.2f%%" % (vgg16.metrics_names[1], scores[1]*100))


accuracy: 80.75%


In [25]:
def predict_tflite(tflite_model, x_test, y_true, quantized):

    #lista de tiempos 
    times=[]

    # Preparar el cjto de test
    x_test_ = x_test.copy()
    x_test_ = x_test_.astype(np.float32)

    if(quantized):
        # Instanciar un intérprete de Tensorflow lite
        VGG_quantized_interpreter = tf.lite.Interpreter('vgg16TFLitePrunedQuant.tflite')

        # Reservar memoria para el modelo
        VGG_quantized_interpreter.allocate_tensors()

        # Tensores de entrada y salida
        input_details_quantized = VGG_quantized_interpreter.get_input_details()[0]
        output_details_quantized = VGG_quantized_interpreter.get_output_details()[0]

        # Arrays para almacenar resultados
        y_pred_quantized = np.empty([x_test_.shape[0], 10], dtype=output_details_quantized["dtype"])

        # Para cada elemento del conjunto de test ...
        for i in range(len(x_test_)):
            # Escribimos el tensor en la input de la red neuronal
            VGG_quantized_interpreter.set_tensor(input_details_quantized["index"], [x_test_[i]])
            # Invocamos al intérprete
            init = time.time() # Comenzamos a medir el tiempo
            VGG_quantized_interpreter.invoke()
            end = time.time() # Acabamos
            times.append(end - init) # Elapsed time
            # Guardamos la salida 
            y_pred_quantized[i,:] = to_categorical(VGG_quantized_interpreter.get_tensor(output_details_quantized["index"])[0].argmax(), 10)
        
        # Media de los tiempos de predicción en segs
        print("Media de los tiempos de ejecución con cuantización: " + str(statistics.mean(times)) + "segs")
        accuracy_score = sklearn.metrics.accuracy_score(y_true, y_pred_quantized)
        print("Accuracy score:", accuracy_score)
        #print(y_pred)
        
        return accuracy_score

    else:
        # Instanciar un intérprete de Tensorflow lite
        VGG_interpreter = tf.lite.Interpreter('vgg16TFLitePruned.tflite')

        # Reservar memoria para el modelo
        VGG_interpreter.allocate_tensors()

        # Tensores de entrada y salida
        input_details = VGG_interpreter.get_input_details()[0]
        output_details = VGG_interpreter.get_output_details()[0]

        # Arrays para almacenar resultados
        y_pred = np.empty([x_test_.shape[0], 10], dtype=output_details["dtype"])

        # Para cada elemento del conjunto de test ...
        for i in range(len(x_test_)):
            # Escribimos el tensor en la input de la red neuronal
            VGG_interpreter.set_tensor(input_details["index"], [x_test_[i]])
            # Invocamos al intérprete
            init = time.time() # Comenzamos a medir el tiempo
            VGG_interpreter.invoke()
            end = time.time() # Acabamos
            times.append(end - init) # Elapsed time
            # Guardamos la salida 
            y_pred[i,:] = to_categorical(VGG_interpreter.get_tensor(output_details["index"])[0].argmax(), 10)
            
        # Media de los tiempos de predicción en segs
        print("Media de los tiempos de ejecución sin cuantización: " + str(statistics.mean(times)) + "segs")
        # Cálculo de la precisión
        accuracy_score = sklearn.metrics.accuracy_score(y_true, y_pred)
        print("Accuracy score:", accuracy_score)
        #print(y_pred)
        
        return accuracy_score

In [26]:
# Comparación de precisión
_ , accuracy_tf = vgg16.evaluate(x_test, y_test_oh, batch_size=1, verbose=1)
_ ,accuracy_tfpruned = model_pruned.evaluate(x_test, y_test_oh, batch_size=1, verbose=1)
accuracy_no_quant_tflite = predict_tflite(tflite_model, x_test, y_test_oh, False)
accuracy_quant_tflite = predict_tflite(tflite_quant_model, x_test, y_test_oh, True)

Media de los tiempos de ejecución sin cuantización: 0.009961427998542785segs
Accuracy score: 0.8011
Media de los tiempos de ejecución con cuantización: 0.015525880050659179segs
Accuracy score: 0.8003


In [27]:
df = pd.DataFrame.from_records(
    [["Keras", accuracy_tf],
     ["Keras podado", accuracy_tfpruned],
     ["TensorFlow Lite no Cuant.", accuracy_no_quant_tflite],
     ["TensorFlow Lite Cuant.", accuracy_quant_tflite]],
     columns = ["Model", "Accuracy"], index="Model")
df

Unnamed: 0_level_0,Accuracy
Model,Unnamed: 1_level_1
Keras,0.8075
Keras podado,0.8011
TensorFlow Lite no Cuant.,0.8011
TensorFlow Lite Cuant.,0.8003
