In [2]:
import time
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as premobilenet
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as prexception

from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.applications.nasnet import preprocess_input as prenasnet
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [3]:
model_no = 1
model_type = [MobileNetV2, Xception, NASNetLarge][model_no]
preprocess_input = [premobilenet, prexception, prenasnet][model_no]
model_name = ['MobileNetV2', 'Xception', 'NASNetLarge'][model_no]


In [4]:
# load the MobileNetV2 model with pre-trained weights from ImageNet and remove the last layer
input_shape = (96, 96)
base_model = model_type(weights='imagenet', include_top=False, input_shape=input_shape+(3,))
base_model.trainable = False


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [2]:
device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

print(tf.config.list_physical_devices('GPU'))


Found GPU at: /device:GPU:0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
# load and preprocess the CIFAR100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

print(x_train.dtype)
data_augmentation = keras.Sequential(
    
    [layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.Resizing(*input_shape, interpolation="bilinear", crop_to_aspect_ratio=False),
    layers.RandomCrop(*input_shape, seed=None)

    ]
)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
uint8


In [7]:
# add new classification layers for CIFAR100
inputs = keras.Input(shape=(32,32,3))
x = preprocess_input(inputs)
x = data_augmentation(x)
x = base_model(x, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
outputs = Dense(100, activation='softmax')(x)
model = Model(inputs, outputs)

# compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])


In [8]:
# train the model and evaluate its accuracy and inference time
with tf.device("/device:GPU:0"):
    model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
base_model.trainable = True

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),  # Very low learning rate
              loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])    
with tf.device("/device:GPU:0"):
    model.fit(x_train, y_train, epochs=15, batch_size=32, validation_data=(x_test, y_test), ) # 20 epochs


"""
start_time = time.time()
loss, accuracy = model.evaluate(x_test, y_test)
end_time = time.time()
inference_time = end_time - start_time
print('Accuracy:', accuracy)
print('Inference time:', inference_time)

"""

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


"\nstart_time = time.time()\nloss, accuracy = model.evaluate(x_test, y_test)\nend_time = time.time()\ninference_time = end_time - start_time\nprint('Accuracy:', accuracy)\nprint('Inference time:', inference_time)\n\n"

In [10]:
start_time = time.time()
loss, accuracy = model.evaluate(x_test, y_test, batch_size=1)
end_time = time.time()
inference_time = end_time - start_time
print('Accuracy:', accuracy)
print('Inference time:', inference_time/10000)
print("FPS:", 1/(inference_time/10000))

Accuracy: 0.7294999957084656
Inference time: 0.014209563064575196
FPS: 70.37514070316671


In [11]:
model.save(model_name)



In [12]:
def representative_dataset():
  for data in tf.data.Dataset.from_tensor_slices(x_train.astype(np.float32)).batch(1).take(100):
    yield [data]

converter = tf.lite.TFLiteConverter.from_saved_model(model_name)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_quant_model = converter.convert()

In [13]:
with open(model_name+'.tflite', 'wb') as f:
  f.write(tflite_quant_model)

In [3]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
#x_train, x_test = x_train.astype(np.int8)/255, x_test.astype(np.int8)/255
#x_train = tf.keras.applications.mobilenet_v2.preprocess_input(x_train)

# x_train, x_test = (x_train/255).astype(np.int8), (x_test/255).astype(np.int8)



Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


# Test with TFLite

In [None]:
interpreter = tf.lite.Interpreter(model_path=model_name+'.tflite')

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
import time

def r(im, interpreter = interpreter, print_input_type = 0):

  # Allocate tensors
  interpreter.allocate_tensors()
  # If the expected input type is int8 (quantized model), rescale data
  input_type = input_details[0]['dtype']
  if print_input_type: print(input_type)
  input_shape = input_details[0]["shape"]
  im1 = im.astype(input_type)
  #print(input_type)
  start = time.perf_counter()
  interpreter.set_tensor(input_details[0]['index'], im1.reshape(input_shape))

  # Run inference
  interpreter.invoke()

  # output_details[0]['index'] = the index which provides the input
  output = interpreter.get_tensor(output_details[0]['index'])
  end = time.perf_counter()
  return output.argmax(), (end-start)
r(x_test[1,:,:,:], print_input_type=1)

<class 'numpy.uint8'>


(33, 0.005614476000118884)

In [None]:
preds = np.zeros(10000)

start = time.perf_counter()

lat1 = 0
for i in range(10000):
  pred, lat = r(x_test[i,:,:,:])
  lat1+=lat
  preds[i] = pred

lat2 = time.perf_counter() - start

In [None]:
print(lat1, lat2)
print("fps:", 1/(lat1/10000), 1/(lat2/10000))

4.029510787999243 4.054668900000024
fps: 248.16908369577192 246.6292623794742


In [None]:
preds

array([68.,  8., 55., ..., 38., 42., 54.])

In [None]:
(y_test[:10000].T == preds).sum()
print(model_name)

6052

In [None]:
np.save('x_test.npy', x_test)

In [None]:
np.save("y_test.npy", y_test)