# TF2 기반, Gradual Pruning을 적용해서 모델 최적화

* https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras?hl=ko

In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # 텐서플로가 첫 번째 GPU만 사용하도록 제한
    # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print('GPU[0] is ready')
    except RuntimeError as e:
        # 프로그램 시작시에 접근 가능한 장치가 설정되어야만 합니다
        print(e)
else:
    print('Please check GPU available')
    
import os
import sys
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, optimizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
from tensorflow import feature_column as fc
import tensorflow_datasets as tfds
plt.rcParams["font.family"] = 'NanumBarunGothic'
TENSORBOARD_BINARY = '/home/hoondori/anaconda3/envs/ai/bin/tensorboard'
os.environ['TENSORBOARD_BINARY'] =  TENSORBOARD_BINARY

GPU[0] is ready


In [2]:
import tempfile
import os

import tensorflow as tf
import numpy as np

from tensorflow import keras

# Pruning 이 없는 MNIST 훈련

In [3]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

# Define the model architecture.
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])



# Train the digit classification model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 12)        120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 12)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2028)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                20290     
Total params: 20,410
Trainable params: 20,410
Non-trainable params: 0
_________________________________________________________________


In [4]:
model.fit(train_images, train_labels, epochs=4, validation_split=0.1)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7f3f2c4825c0>

In [5]:
_, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.9767000079154968
Saved baseline model to: /tmp/tmpowb9abm7.h5


# Pruning이 적용되는 모델 정의

* 시작은 이전 단계에서 4 epochs 로 학습된 pre-trained model

In [6]:
import tensorflow_model_optimization as tfmot

# pre-trained 모델에 대해서 추가적인 2 epochs 동안 pruning 진행
batch_size = 128
epochs = 2
validation_split = 0.1

num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# define model for pruning
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50, 
                            final_sparsity=0.80, begin_step=0,  end_step=end_step)
}

model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)

# prune need recompile
model_for_pruning.compile(optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

model_for_pruning.summary()  # <- mask가 추가되어서 non-trainable weights 가 trainable-weights 만큼 늘어났다.

Instructions for updating:
Please use `layer.add_weight` method instead.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_reshape  (None, 28, 28, 1)         1         
_________________________________________________________________
prune_low_magnitude_conv2d ( (None, 26, 26, 12)        230       
_________________________________________________________________
prune_low_magnitude_max_pool (None, 13, 13, 12)        1         
_________________________________________________________________
prune_low_magnitude_flatten  (None, 2028)              1         
_________________________________________________________________
prune_low_magnitude_dense (P (None, 10)                40572     
Total params: 40,805
Trainable params: 20,410
Non-trainable params: 20,395
_________________________________________________________________


In [7]:
logdir = tempfile.mkdtemp()
print(logdir)

callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir)
]

model_for_pruning.fit( train_images, train_labels, batch_size=batch_size, 
    epochs=epochs, validation_split=validation_split, callbacks=callbacks)

/tmp/tmpu4he84lc
Epoch 1/2
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f3fadf01588>

In [8]:
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy) 
print('Pruned test accuracy:', model_for_pruning_accuracy)

Baseline test accuracy: 0.9767000079154968
Pruned test accuracy: 0.9714999794960022


In [9]:
%tensorboard --logdir={logdir}

UsageError: Line magic function `%tensorboard` not found.


# 가지 치기로 3 배 더 작은 모델 생성

* pruning 학습 그 자체로는 모델이 작아지지 않는다 (오히려 학습시 사용하는 mask 때문에 더 커짐)
* pruned된 weight만 저장하여 3배 더 작은 모델을 생성해야 한다.

In [13]:
# tensorflow용 압축 가능한 모델 생성 (zip 하기 전에는 이전 용량과 거의 비슷)
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

Saved pruned Keras model to: /tmp/tmph5m00qiy.h5


In [14]:
# TF Lite용 압축 가능한 모델 생성

converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
    f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)

INFO:tensorflow:Assets written to: /tmp/tmp43e9c37p/assets


INFO:tensorflow:Assets written to: /tmp/tmp43e9c37p/assets


Saved pruned TFLite model to: /tmp/tmp_isgptzr.tflite


In [15]:
# 실제로 gzip을 통해 모델을 압축하고 압축 된 크기를 측정하는 도우미 함수를 정의합니다.

def get_gzipped_model_size(file):
    
    import os
    import zipfile
    
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)
        
    return os.path.getsize(zipped_file)

In [16]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned Keras model: %.2f bytes" % (get_gzipped_model_size(pruned_keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file)))

Size of gzipped baseline Keras model: 78052.00 bytes
Size of gzipped pruned Keras model: 25803.00 bytes
Size of gzipped pruned TFlite model: 24819.00 bytes


# Pruning + 훈련 후 양자화를 통해서 10배 작게 하기

In [17]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)
    

INFO:tensorflow:Assets written to: /tmp/tmpaybdd_dq/assets


INFO:tensorflow:Assets written to: /tmp/tmpaybdd_dq/assets


Saved quantized and pruned TFLite model to: /tmp/tmpz8va8xqd.tflite


In [18]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

Size of gzipped baseline Keras model: 78052.00 bytes
Size of gzipped pruned and quantized TFlite model: 8003.00 bytes


# TF에서 TFLite까지 정확도 지속성 확인

In [20]:
import numpy as np

def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]['index']
    output_index = interpreter.get_output_details()[0]['index']
    
    prediction_digits = []
    for i, test_image in enumerate(test_images):
        if i % 2000 == 0:
            print("Evaluation so far {n}".format(n=i))
            
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)
        
        interpreter.invoke()
        
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)
        
    prediction_digits = np.array(prediction_digits)
    accuracy = (prediction_digits == test_labels).mean()
    return accuracy

In [21]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Pruned and quantized TFLite test_accuracy:', test_accuracy)
print('Pruned TF test accuracy:', model_for_pruning_accuracy)

Evaluation so far 0
Evaluation so far 2000
Evaluation so far 4000
Evaluation so far 6000
Evaluation so far 8000
Pruned and quantized TFLite test_accuracy: 0.9713
Pruned TF test accuracy: 0.9714999794960022
