# Model quantization

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import matplotlib.pylab as plt
import plotly.express as px
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
from sklearn import metrics
from tensorflow import keras
import os

# Load the data

In [3]:
!unzip -qu '/content/drive/MyDrive/Colab Notebooks/EEAI/Project/ASL Alphabet/ASL.zip' -d 'data'
!unzip -qu '/content/drive/MyDrive/Colab Notebooks/EEAI/Project/ASL Alphabet/ASL Real.zip' -d 'data'

In [4]:
train_path = 'data/asl_alphabet_train/asl_alphabet_train'
test_path = 'data/asl_alphabet_test/asl_alphabet_test'
real_path = 'data/asl_alphabet_real/asl_alphabet_real'

In [5]:
class_labels = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
class_mapping = {i:label for i, label in enumerate(class_labels)}

In [6]:
split_threshold = 0.2
batch_size = 32

In [7]:
train_ds, validation_ds = keras.utils.image_dataset_from_directory(train_path, batch_size=batch_size, validation_split=split_threshold, subset='both', seed=42, class_names=class_labels)

Found 87000 files belonging to 29 classes.
Using 69600 files for training.
Using 17400 files for validation.


In [8]:
test_ds = keras.utils.image_dataset_from_directory(real_path, batch_size=batch_size, class_names=class_labels)

Found 870 files belonging to 29 classes.


# Pre-processing

In [9]:
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
validation_ds = validation_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

In [10]:
img_shape = (256, 256, 3)
border_width = 8
num_classes = 29

In [11]:
def crop(image, label):
    ratio = (image.shape[1] - border_width) / image.shape[1]
    cropped_img = tf.image.central_crop(image, ratio)
    return cropped_img, label

In [12]:
train_ds = train_ds.map(crop, num_parallel_calls=tf.data.AUTOTUNE)
validation_ds = validation_ds.map(crop, num_parallel_calls=tf.data.AUTOTUNE)

In [13]:
model = keras.models.load_model("asl_mobilenet_tuned")

## Model evaluation

In [14]:
loss, accuracy = model.evaluate(validation_ds)
print('Validation accuracy:', accuracy)

Validation accuracy: 0.9870689511299133


In [15]:
true_labels = np.array([])
pred_labels = np.array([])
for batch in validation_ds:
  # Tuple unpacking
  images, t_labels = batch

  # Compute new labels
  p_labels = model.predict_on_batch(images)
  p_labels = np.argmax(p_labels, axis=1)

  # Concatenate in a single vector
  true_labels = np.concatenate([true_labels, t_labels])
  pred_labels = np.concatenate([pred_labels, p_labels])

In [16]:
size = 1000
cm = metrics.confusion_matrix(true_labels, pred_labels)
fig = px.imshow(cm, x=class_labels, y=class_labels, text_auto=True, width=size, height=size, color_continuous_scale='blues')
fig.update_layout(
    title_text='Multiclass confusion matrix',
    xaxis_title_text='Actual class',
    yaxis_title_text='Predicted class',
)

In [17]:
print(metrics.classification_report(true_labels, pred_labels, target_names=class_labels))

              precision    recall  f1-score   support

           A       0.99      0.99      0.99       623
           B       1.00      0.99      0.99       634
           C       1.00      1.00      1.00       601
           D       1.00      1.00      1.00       643
         del       1.00      0.99      1.00       586
           E       0.99      0.99      0.99       548
           F       1.00      1.00      1.00       646
           G       0.99      0.99      0.99       612
           H       0.99      0.99      0.99       626
           I       1.00      0.99      1.00       607
           J       1.00      1.00      1.00       626
           K       0.97      1.00      0.98       553
           L       1.00      1.00      1.00       593
           M       0.98      0.97      0.98       594
           N       0.97      0.98      0.98       555
     nothing       1.00      1.00      1.00       605
           O       1.00      1.00      1.00       595
           P       1.00    

In [18]:
correct_percentage =  cm.diagonal() / cm.sum(axis=1)
relative_errors = 1 - correct_percentage
fig = px.bar(x=class_labels, y=relative_errors, orientation='v')
fig.update_layout(
    title_text='Percentage of miss-classifications per class',
    xaxis_title_text='Class name',
    yaxis_title_text='Count',
    bargap=0.3,
)

# Quantization

### Your task starts here:
+ Run this code, and you'll have model1.tflite, with no optimization or quantization

+ Then, remove the comment on the ```converter.optimizations = []``` line. Change the model name to model2.tflite, and rerun. Model2.tflite will now have optimizations added -- you should see a much smaller file size.

+ Finally, remove the comments on the code to add a representative dataset and set the supported ops as shown. Change the model name to model3.tflite, and rerun. Model3.tflite will now have optimizations added, along with quantization from the representative dataset. Note: it might be slightly larger than model2.tflite!

**Note:** tf.lite.Optimize has changed and the OPTIMIZE_FOR_SIZE and OPTIMIZE_FOR_LATENCY options are now deprecated and are the same and DEFAULT: https://www.tensorflow.org/api_docs/python/tf/lite/Optimize

In [19]:
import pathlib
converter = tf.lite.TFLiteConverter.from_saved_model("asl_mobilenet_tuned")

# These options are for converter optimizaitons
# Consider trying the converter without them and
# explore model size and accuracy
# Then...use them and reconvert the model and explore model
# size an accuracy at that point. What differences do you see?

# converter.optimizations = [tf.lite.Optimize.DEFAULT]    # Uncomment this line for Model 2 and Model 3

# def representative_data_gen():                          # Uncomment the following 5 lines for Model 3
#     for input_value, _ in test_batches.take(100):
#         yield [input_value]
# converter.representative_dataset = representative_data_gen
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

tflite_model = converter.convert()

# Without any optimizations I got
# 8857848  (model1.tflite)
# With the .optimizations property set I got
# 2629648 (model2.tflite)
# With the .optimizations property and representative data set I got
# 2835952 -- Slightly larger!  (model3.tflite)

OSError: ignored

### Your task continues
Now we will test the accuracy of the three models! After you run each model you will get the number of correct predictions and then you can plot which images were correct/incorrect!
+ Run this code
+ Change the model file to model2.tflite and run it again
+ Change the model file to model3.tflite and run it again

In [None]:
#@title Run this cell each time to test your model's accuracy (make sure to change the filename)
from tqdm import tqdm
# Load TFLite model and allocate tensors.
tflite_model_file = '/tmp/model1.tflite'                 # Change the filename here for Model 2 and 3
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

predictions = []

test_labels, test_imgs = [], []
for img, label in tqdm(test_batches.take(100)):
    interpreter.set_tensor(input_index, img)
    interpreter.invoke()
    predictions.append(interpreter.get_tensor(output_index))

    test_labels.append(label.numpy()[0])
    test_imgs.append(img)

# For model 1, I got 32.25 it/s
# For model 2, I got 16.16 it/s
# For model 3, I got 1.19s it/s
# Note: since the it/s will depend on the computer on which your Colab VM
#       instance is running -- we would expect it to vary a bit.
# Note2: Changes have been made to the TFLite Interpreter since Laurence filmed the
#        previous video that further optimize it for mobile use at the expense of
#        speed in Colab. As such, you'll find that while Laurence was able to achieve
#        16 it/s for model 2, you may only see speeds of 1-2 it/s

score = 0
for item in range(0,100):
  prediction=np.argmax(predictions[item])
  label = test_labels[item]
  if prediction==label:
    score=score+1

print("Out of 100 predictions I got " + str(score) + " correct")

# Model 1 - 100 Correct
# Model 2 - 99 Correct
# Model 3 - 99 Correct
# Note: since training starts from a random intialization it would not be
#       surprising if your result is off by 1 or 2 correct.