In [1]:
import sys
from matplotlib import pyplot
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

google_colab = False
if google_colab:
  from google.colab import drive
  drive.mount('/content/gdrive')

2021-11-02 11:34:17.967183: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2021-11-02 11:34:17.967224: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
import struct

def bin2float(b):
    ''' Convert binary string to a float.

    Attributes:
        :b: Binary string to transform.
    '''
    h = int(b, 2).to_bytes(8, byteorder="big")
    return struct.unpack('>d', h)[0]


def float2bin(f):
    ''' Convert float to 64-bit binary string.

    Attributes:
        :f: Float number to transform.
    '''
    [d] = struct.unpack(">Q", struct.pack(">d", f))
    return f'{d:064b}'

In [3]:
def quantize_float (float_number, mantissa_bits):
    if mantissa_bits < 0:
      mantissa_bits = 0
    coefficient = float2bin(float_number)
    sign = int (coefficient[:1], 2)
    if 0 < mantissa_bits:
        custom_mantissa = int (coefficient[12 : 12 + mantissa_bits], 2)
    else:
        custom_mantissa = 0
    residual_mantissa = int (coefficient[12 + mantissa_bits:], 2)
    exponent = int (coefficient[1:12], 2) - 1023
    if (pow (2, (52 - (mantissa_bits + 1))) - 1) < residual_mantissa:
        custom_mantissa += 1
        if (pow (2, mantissa_bits) - 1) < custom_mantissa:
            custom_mantissa = 0
            exponent += 1
    return pow(-1, sign) * (1 + custom_mantissa * pow(2, - mantissa_bits)) * pow(2, exponent)

def quantize_model(model, mantissa_bits):
  for layer in model.layers:
    if isinstance(layer, tf.keras.layers.Conv2D):
      layer_weight = layer.get_weights()
      if len(layer_weight) == 2:
        filter_matrix = layer_weight[0]
        bias_matrix = layer_weight[1]
        if filter_matrix.ndim == 4:
          for id_i, i in enumerate(filter_matrix):
            for id_j, j in enumerate(i):
              for id_k, k in enumerate(j):
                for id_l ,l in enumerate(k):
                  filter_matrix[id_i][id_j][id_k][id_l] = quantize_float (l, mantissa_bits)
                  bias_matrix[id_l] = quantize_float (bias_matrix[id_l], mantissa_bits)
        layer_weight[0] = filter_matrix
        layer_weight[1] = bias_matrix
        layer.set_weights(layer_weight)

In [4]:
filename = "tiny_vgg"

In [5]:
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto', restore_best_weights=True)

In [6]:
epoch_count = 0
quantize = False
mantissa_bits = 2
class MyCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    global epoch_count
    global quantize
    global mantissa_bits
    epoch_count += 1
    quantize = 1 < epoch_count
    if quantize:
      quantize_model(self.model, mantissa_bits)
      #mantissa_bits -= 1
  def on_train_end(self, logs={}):
    quantize_model(self.model, mantissa_bits)
  def on_batch_end(self, epoch, logs=None):
    if quantize:
      quantize_model(self.model, mantissa_bits)


In [28]:
if not os.path.exists(filename):
  os.mkdir(filename)

# load train and test dataset
def load_dataset():
	# load dataset
	(trainX, trainY), (testX, testY) = cifar10.load_data()
	# one hot encode target values
	trainY = to_categorical(trainY)
	testY = to_categorical(testY)
	return trainX, trainY, testX, testY

# scale pixels
def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

# define cnn model
def define_model_CNN():
	model = Sequential()
	model.add(Conv2D(40, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(50, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(60, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(60, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(120, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(Flatten())
	model.add(Dense(120, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(10, activation='softmax'))
	# compile model
	#opt = SGD(learning_rate=0.001, momentum=0.9)
	opt = "adam"
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
	return model

# plot diagnostic learning curves
def summarize_diagnostics(history):
	# plot loss
	pyplot.subplot(211)
	pyplot.title('Cross Entropy Loss')
	pyplot.plot(history.history['loss'], color='blue', label='train')
	pyplot.plot(history.history['val_loss'], color='orange', label='test')
	# plot accuracy
	pyplot.subplot(212)
	pyplot.title('Classification Accuracy')
	pyplot.plot(history.history['accuracy'], color='blue', label='train')
	pyplot.plot(history.history['val_accuracy'], color='orange', label='test')
	# save plot to file
	pyplot.savefig(filename + '/' + filename + '_plot.png')
	pyplot.close()

# run the test harness for evaluating a model
def run_test_harness():
	# load dataset
	trainX, trainY, testX, testY = load_dataset()
	# prepare pixel data
	trainX, testX = prep_pixels(trainX, testX)
	# define model
	model = define_model_CNN()
	# fit model
	history = model.fit(trainX, trainY, epochs=20, batch_size=64, validation_data=(testX, testY), verbose=1, callbacks=[MyCallback()])
	# evaluate model
	_, acc = model.evaluate(testX, testY, verbose=0)
	print('> %.3f' % (acc * 100.0))
	# learning curves
	summarize_diagnostics(history)
	# Save the entire model to a HDF5 file.
	model.save(filename + "/" + filename + '.h5')

In [29]:
# entry point, run the test harness
# load dataset
trainX, trainY, testX, testY = load_dataset()

# prepare pixel data
trainX, testX = prep_pixels(trainX, testX)

# define model
model = define_model_CNN()
# fit model
#history = model.fit(trainX, trainY, epochs=20, batch_size=8, validation_data=(testX, testY), verbose=1, callbacks=[MyCallback(), monitor])
history = model.fit(trainX, trainY, epochs=20, batch_size=100, validation_data=(testX, testY), verbose=1, callbacks=[monitor])
#history = model.fit(trainX, trainY, epochs=15, batch_size=100, validation_data=(testX, testY), verbose=1, callbacks=[MyCallback()])
#history = model.fit(trainX, trainY, epochs=15, batch_size=100, validation_data=(testX, testY), verbose=1)

# evaluate model
_, acc = model.evaluate(testX, testY, verbose=0)
print('> %.3f' % (acc * 100.0))
# learning curves
summarize_diagnostics(history)
# Save the entire model to a HDF5 file.
model.save(filename + "/" + filename + '.h5')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 00013: early stopping
> 72.390


In [30]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
import numpy as np

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

img_path = "dog.jpg"
img = image.load_img(img_path, target_size=(32, 32))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = x.astype('float32')
x = x / 255.0

model = load_model(filename + "/" + filename + '.h5')

output_data = model.predict(x)

print('TensorFlow Predicted:', output_data)

print("Done!")

TensorFlow Predicted: [[1.2714170e-03 6.4237749e-05 2.5651827e-01 1.3353398e-01 2.4132918e-01
  1.5140045e-01 4.0277380e-02 1.7277840e-01 1.6187595e-03 1.2079814e-03]]
Done!


In [31]:
def print_model(model):
  for layer in model.layers:
    if isinstance(layer, tf.keras.layers.Conv2D):
      layer_weight = layer.get_weights()
      if len(layer_weight) == 2:
        filter_matrix = layer_weight[0]
        bias_matrix = layer_weight[1]
        print (filter_matrix)
        print (bias_matrix)

In [32]:
print_model (model)
_, acc = model.evaluate(testX, testY, verbose=0)
print('> %.3f' % (acc * 100.0))

[[[[-0.17313585 -0.4730001   0.09522478 ...  0.221571   -0.36475548
    -0.15954335]
   [ 0.18617469  0.05973793 -0.2755211  ... -0.19283012  0.39018962
     0.25339538]
   [ 0.2566992  -0.35420078  0.2826037  ... -0.15302898  0.23398952
    -0.2548966 ]]

  [[ 0.32719958  0.05793207  0.28410858 ...  0.0903483  -0.42995209
     0.30832762]
   [ 0.34696755  0.05606928  0.33908302 ... -0.18209797  0.19997977
    -0.21564764]
   [ 0.2903308   0.16964306 -0.35185635 ... -0.12426452 -0.43748933
     0.33033544]]

  [[ 0.12616694 -0.29489005  0.03006172 ...  0.27736777  0.09093908
    -0.53201765]
   [ 0.00667204  0.43232036 -0.20501167 ...  0.2236231   0.41280004
     0.32302523]
   [ 0.02313158  0.40888655 -0.02568693 ... -0.28755662  0.3731589
     0.2263091 ]]]


 [[[-0.4835316   0.2505895  -0.38957202 ...  0.1448418   0.33771473
     0.04717212]
   [ 0.09083016  0.42694005 -0.18707928 ...  0.33675405 -0.20298956
     0.18005446]
   [-0.5130808  -0.3371796   0.37311298 ... -0.37485963 -0

In [33]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, utils

if not os.path.exists(filename):
  print("Folder " + filename + " does not exist.")
  exit()

# Recreate the exact same model, including its weights and the optimizer
model = tf.keras.models.load_model(filename + "/" + filename + '.h5')

model.summary()

tf.keras.utils.plot_model(model, filename + "/" + filename + ".png", show_shapes=True, show_layer_names=True, expand_nested=True)

(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255 , test_images / 255

train_images = tf.cast(train_images, tf.float32)
cifar_ds = tf.data.Dataset.from_tensor_slices((train_images)).batch(1)
def representative_dataset():
  for input_value in cifar_ds.take(100):
    # Model has only one input so each data point has one element.
    yield [input_value]

# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
#converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.representative_dataset = representative_dataset
#converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32  # or tf.uint8
converter.inference_output_type = tf.float32  # or tf.uint8
tflite_model = converter.convert()

# Save the model.
with open(filename + "/" + filename + "_f32" + '.tflite', 'wb') as f:
  f.write(tflite_model)

print("Done!")

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 32, 32, 40)        1120      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 16, 16, 40)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 16, 16, 50)        18050     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 8, 8, 50)          0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 8, 8, 60)          27060     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 4, 4, 60)          0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 4, 4, 60)         

INFO:tensorflow:Assets written to: /tmp/tmpj_vgwqi3/assets
2021-11-02 14:53:30.867020: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-02 14:53:30.889287: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2021-11-02 14:53:30.940920: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-11-02 14:53:31.009126: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-02 14:53:31.009930: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 860M computeCapability: 5.0
coreClock: 1.0195GHz coreCount: 5 deviceMemorySize: 1.96GiB devi

Done!


In [13]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32  # or tf.uint8
converter.inference_output_type = tf.float32  # or tf.uint8
tflite_model = converter.convert()

# Save the model.
with open(filename + "/" + filename + "_i8" + '.tflite', 'wb') as f:
  f.write(tflite_model)

print("Done!")

INFO:tensorflow:Assets written to: /tmp/tmpju81qx8i/assets


INFO:tensorflow:Assets written to: /tmp/tmpju81qx8i/assets
2021-11-02 11:55:45.721125: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-02 11:55:45.721855: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2021-11-02 11:55:45.721957: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2021-11-02 11:55:45.722666: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-02 11:55:45.723316: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 860M computeCapability: 5.0
coreClock: 1.0195GHz coreCount: 5 deviceMemorySize: 1.96GiB devi

Done!


In [14]:

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=filename + "/" + filename + "_i8" + '.tflite')
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [15]:

interpreter.set_tensor(input_details[0]['index'], x)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print('TensorFlow Lite Predicted:', output_data)

TensorFlow Lite Predicted: [[0.         0.         0.00390625 0.0234375  0.         0.05859375
  0.9140625  0.         0.         0.        ]]
