In [20]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, Dropout, BatchNormalization, Conv1D, MaxPooling1D, Flatten, Input, Reshape, Conv2D, ReLU, MaxPool2D, Masking
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
import ast

# Load and preprocess the data
# Replace 'train.csv' with the actual path to your dataset
data = pd.read_csv('dataset/combined_shuffled.csv', header = None, converters = {
    1: ast.literal_eval,
    2: ast.literal_eval,
    3: ast.literal_eval
})

df = pd.DataFrame()

df['acc_x'] = data[1]
df['acc_y'] = data[2]
df['acc_z'] = data[3]
df['gesture'] = data[0]

#remove invalid rows
df.drop(df.loc[df['acc_x']==0].index, inplace=True)
df.drop(df.loc[df['acc_y']==0].index, inplace=True)
df.drop(df.loc[df['acc_z']==0].index, inplace=True)

df = df.dropna()

# Convert the lists into arrays
acc_x = df['acc_x'].values
acc_y = df['acc_y'].values
acc_z = df['acc_z'].values

# Combine all axes into a sequence of shape (timesteps, features)
sequences = [np.array([x, y, z]).T for x, y, z in zip(acc_x, acc_y, acc_z)]

# Pad sequences to the length of the longest sequence
padded_sequences = pad_sequences(sequences, maxlen = 32, padding='post', dtype='float32')

# Encode labels
labels = df['gesture'].values
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
categorical_labels = to_categorical(encoded_labels)


In [21]:
initial_labels = categorical_labels
initial_sequences = padded_sequences

#DATA AUGMENTATION preprocessing

#adding noise
def add_noise(data, noise_level=0.05):
    return (data + np.random.normal(0, noise_level, data.shape)).astype(np.float32)

# Original data: `x_train` (accelerometer sequences), `y_train` (labels)

noised_data = add_noise(initial_sequences)

def scale_data(data, scaling_factor=0.1):
    
    factor = 1 + np.random.uniform(-scaling_factor, scaling_factor)
    return (data * factor).astype(np.float32)

def time_shift(data, max_shift_percentage=0.1):
  
  shift_amount = int(len(data) * max_shift_percentage * (2 * np.random.rand() - 1))
  shifted_data = np.roll(data, shift_amount, axis=0)
  return (shifted_data).astype(np.float32)

#categorical_labels = np.concatenate((categorical_labels, initial_labels))

"""def time_warp(data, warping_factor=0.1):
    
  timesteps = np.arange(len(data))
  warped_timesteps = timesteps + warping_factor * (np.random.rand(len(data)) - 0.5) * len(data)
  warped_timesteps = np.interp(np.arange(len(data)), np.sort(warped_timesteps), timesteps)
  warped_data = np.zeros_like(data)
  for i in range(len(data)):
    warped_data[i] = np.interp(i, timesteps, data)
  return warped_data
  """

#warped_seq = time_warp(padded_sequences)

def rotate_3d(data, max_angle_degrees=10):

  angle_x = np.random.uniform(-max_angle_degrees, max_angle_degrees) * np.pi / 180
  angle_y = np.random.uniform(-max_angle_degrees, max_angle_degrees) * np.pi / 180
  angle_z = np.random.uniform(-max_angle_degrees, max_angle_degrees) * np.pi / 180

  rotation_x = np.array([[1, 0, 0],
                        [0, np.cos(angle_x), -np.sin(angle_x)],
                        [0, np.sin(angle_x), np.cos(angle_x)]])
  rotation_y = np.array([[np.cos(angle_y), 0, np.sin(angle_y)],
                        [0, 1, 0],
                        [-np.sin(angle_y), 0, np.cos(angle_y)]])
  rotation_z = np.array([[np.cos(angle_z), -np.sin(angle_z), 0],
                        [np.sin(angle_z), np.cos(angle_z), 0],
                        [0, 0, 1]])

  rotation_matrix = np.dot(np.dot(rotation_x, rotation_y), rotation_z)
  rotated_data = np.dot(data, rotation_matrix.T)

  return (rotated_data).astype(np.float32)

rotated_seq = rotate_3d(initial_sequences)

padded_sequences = np.concatenate((padded_sequences, add_noise(padded_sequences)))
categorical_labels = np.concatenate((categorical_labels, categorical_labels))


#padded_sequences = np.concatenate((padded_sequences, scale_data(padded_sequences)))
#categorical_labels = np.concatenate((categorical_labels, categorical_labels))


#padded_sequences = np.concatenate((padded_sequences, time_shift(padded_sequences)))
#categorical_labels = np.concatenate((categorical_labels, categorical_labels))


padded_sequences = np.concatenate((padded_sequences, rotate_3d(padded_sequences)))
categorical_labels = np.concatenate((categorical_labels, categorical_labels))


In [22]:
# Split the data
X_train, X_validation, y_train, y_validation = train_test_split(
    padded_sequences, categorical_labels, test_size=0.2, random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train, test_size=0.25, random_state=42)



In [8]:

# Neural network model
model = Sequential([
    Input(shape=(32, 3)),
    # Mask padding values
    #Masking(mask_value=0.0, input_shape=(None, 3)),
    Reshape((8,4,3)),
    Conv2D(filters=16, kernel_size = (3,3), padding = "same"),
    Dropout(0.3),
    BatchNormalization(),
    ReLU(),
    Conv2D(filters=8, kernel_size = (3,3), padding= "same"),
    Dropout(0.4),
    BatchNormalization(),
    ReLU(),
    MaxPool2D(pool_size = 2),
    
    
    Flatten(),
    Dense(8),
    Dropout(0.5),
    BatchNormalization(),
    ReLU(),

    Dense(len(label_encoder.classes_), activation = 'softmax'),
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_validation, y_validation),
    epochs=20,
    batch_size=32,
    verbose=1
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

# Save the model
model.save('cnn_gesture_classification_model_demo.h5')

# Decode predicted labels for interpretability
predicted_classes = label_encoder.inverse_transform(np.argmax(model.predict(X_test), axis=1))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Loss: 0.2177741527557373, Test Accuracy: 0.9761295914649963


  saving_api.save_model(


In [9]:
def representative_data_gen():
  for input_value in tf.data.Dataset.from_tensor_slices(padded_sequences).batch(1).take(32):
    yield [input_value]
        
# Ensure input shape is fixed
model.build(input_shape=(None, 32, 3))  # Example: Fixed length 32 timesteps, 3 features

converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Apply integer quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]  # Ensure compatibility
converter.inference_input_type = tf.uint8  # Optional: Set input type
converter.inference_output_type = tf.uint8  # Optional: Set output type

tflite_model = converter.convert()

# Save the model
with open('cnn_gesture_classification_model_integer_demo.tflite', 'wb') as f:
    f.write(tflite_model)

print("Model with full integer quantization saved as gesture_classification_model_integer_demo.tflite")

INFO:tensorflow:Assets written to: /tmp/tmpeq63t1qj/assets


INFO:tensorflow:Assets written to: /tmp/tmpeq63t1qj/assets


Model with full integer quantization saved as gesture_classification_model_integer_demo.tflite


2025-01-08 10:51:06.421172: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-01-08 10:51:06.421188: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-01-08 10:51:06.421401: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpeq63t1qj
2025-01-08 10:51:06.422994: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-01-08 10:51:06.423005: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpeq63t1qj
2025-01-08 10:51:06.425757: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2025-01-08 10:51:06.427004: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-01-08 10:51:06.475685: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpeq63t1qj
2025-01

In [10]:
import os

# Convert the model to TFLite without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
fp_tflite_model = converter.convert()

# Save the model to disk
open("cnn_model_demo_f32.tflite", "wb").write(fp_tflite_model)

# Show the model size for the non-quantized HDF5 model
fp_h5_in_kb = os.path.getsize('models/cnn_gesture_classification_model_demo.h5') / 1024
print("HDF5 Model size without quantization: %d KB" % fp_h5_in_kb)

# Show the model size for the non-quantized TFLite model
fp_tflite_in_kb = os.path.getsize('cnn_model_demo_f32.tflite') / 1024
print("TFLite Model size without quantization: %d KB" % fp_tflite_in_kb)

# Determine the reduction in model size
print("\nReduction in file size by a factor of %f" % (fp_h5_in_kb / fp_tflite_in_kb))

INFO:tensorflow:Assets written to: /tmp/tmptxzjnljn/assets


INFO:tensorflow:Assets written to: /tmp/tmptxzjnljn/assets


HDF5 Model size without quantization: 98 KB
TFLite Model size without quantization: 13 KB

Reduction in file size by a factor of 7.382456


2025-01-08 10:51:10.000312: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-01-08 10:51:10.000328: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-01-08 10:51:10.000430: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmptxzjnljn
2025-01-08 10:51:10.002001: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-01-08 10:51:10.002016: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmptxzjnljn
2025-01-08 10:51:10.006015: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-01-08 10:51:10.050892: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmptxzjnljn
2025-01-08 10:51:10.062596: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 62166 m

In [11]:
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

# Show the model size for the 8-bit quantized TFLite model
tflite_quant_in_kb = os.path.getsize('models/cnn_gesture_classification_model_integer_demo.tflite') / 1024
print("TFLite Model size with 8-bit quantization: %d KB" % tflite_quant_in_kb)


input:  <class 'numpy.uint8'>
output:  <class 'numpy.uint8'>
TFLite Model size with 8-bit quantization: 8 KB


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [12]:
# Helper function to run inference on a TFLite model

test_sequence_indices = range(X_test.shape[0])

def run_tflite_model(tflite_file, test_image_indices):
  global X_test

  # Initialize the interpreter
  interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
  interpreter.allocate_tensors()

  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  predictions = []
  for i, test_sequence_index in enumerate(test_sequence_indices):
    test_sequence = X_test[test_sequence_index]
    test_label = y_test[test_sequence_index]

    if (test_sequence_index % 100 == 0):
      print("Evaluated on %d sequences." % test_sequence_index)

    # Check if the input type is quantized, then rescale input data to uint8
    if input_details['dtype'] == np.uint8:
      input_scale, input_zero_point = input_details["quantization"]
      test_sequence = test_sequence / input_scale + input_zero_point

    test_sequence = np.expand_dims(test_sequence, axis=0).astype(input_details["dtype"])
    interpreter.set_tensor(input_details["index"], test_sequence)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]

    predictions.append(np.argmax(output, axis=-1))

  return predictions


# Helper function to evaluate a TFLite model on all images
def evaluate_model(tflite_file, model_type):
  global X_test
  global y_test

  test_sequence_indices = range(X_train.shape[0])
  predictions = run_tflite_model(tflite_file, test_sequence_indices)
    
  labels = np.argmax(y_test, axis=1)

  accuracy = (np.sum(labels == predictions) * 100) / len(X_test)

  print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(X_test)))

input_details = interpreter.get_input_details()[0]
scale, zero_point = input_details["quantization"]    

In [13]:
import pathlib
# this might take a few minutes (~ 1- 2 minutes)
# if it takes longer than that, I suggest to 
# restart the runtime and try again
# if the issue still persists, restart your computer
tflite_model_quant_int8_file = pathlib.Path('models/cnn_gesture_classification_model_integer_demo.tflite')
tflite_model_quant_int8_model_type = "Full Post-Quantized INT8"

evaluate_model(tflite_model_quant_int8_file, tflite_model_quant_int8_model_type)

input_details = interpreter.get_input_details()[0]
input_scale, input_zero_point = input_details["quantization"]




Evaluated on 0 sequences.
Evaluated on 100 sequences.
Evaluated on 200 sequences.
Evaluated on 300 sequences.
Evaluated on 400 sequences.
Evaluated on 500 sequences.
Evaluated on 600 sequences.
Evaluated on 700 sequences.
Evaluated on 800 sequences.
Evaluated on 900 sequences.
Evaluated on 1000 sequences.
Evaluated on 1100 sequences.
Full Post-Quantized INT8 model accuracy is 97.1014% (Number of test samples=1173)


In [14]:
# we plot also the confusion matrix of the quantized model
tflite_model_quant_int8_pred = run_tflite_model(tflite_model_quant_int8_file, range(y_test.shape[0]))

# compute the accuracy of the quantized model
from sklearn.metrics import accuracy_score

labels = np.argmax(y_test, axis=1)

full_int8_accuracy = accuracy_score(labels, tflite_model_quant_int8_pred)
print("Full-precision model accuracy is %.4f%% (Number of test samples=%d)" % (test_accuracy * 100, len(y_test)))
print("Quantized model accuracy is %.4f%% (Number of test samples=%d)" % (full_int8_accuracy * 100, len(y_test)))

Evaluated on 0 sequences.
Evaluated on 100 sequences.
Evaluated on 200 sequences.
Evaluated on 300 sequences.
Evaluated on 400 sequences.
Evaluated on 500 sequences.
Evaluated on 600 sequences.
Evaluated on 700 sequences.
Evaluated on 800 sequences.
Evaluated on 900 sequences.
Evaluated on 1000 sequences.
Evaluated on 1100 sequences.
Full-precision model accuracy is 97.6130% (Number of test samples=1173)
Quantized model accuracy is 97.1014% (Number of test samples=1173)


In [15]:
# Function: Convert some hex value into an array for C programming
def hex_to_c_array(hex_data, var_name):

    c_str = ''

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += '#define ' + var_name.upper() + '_H\n\n'

    # Add array length at top of file
    c_str += '\nstatic const unsigned int ' + var_name + '_len = ' + str(len(hex_data)) + ';\n'

    # Declare C variable
    c_str += 'static const unsigned char ' + var_name + '[] = {'
    hex_array = []
    for i, val in enumerate(hex_data) :

        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formatting so each line stays within 80 characters
        if (i + 1) < len(hex_data):
            hex_str += ','
        if (i + 1) % 12 == 0:
            hex_str += '\n '
        hex_array.append(hex_str)

    # Add closing brace
    c_str += '\n ' + format(' '.join(hex_array)) + '\n};\n\n'

    # Close out header guard
    c_str += '#endif //' + var_name.upper() + '_H'

    return c_str

In [16]:
c_model_name = 'q8_demo_cnn'
# check if dir 'cfiles' exists, if not create it
if not os.path.exists('cfiles'):
    os.makedirs('cfiles')
# Write TFLite model to a C source (or header) file
with open('cfiles/' + c_model_name + '.h', 'w') as file:
    file.write(hex_to_c_array(tflite_model, c_model_name))

In [23]:
input_details = interpreter.get_input_details()[0]
input_scale, input_zero_point = input_details["quantization"]

# save the test data as numpy arrays
np.save('x_test_gestures_demo.npy', (X_test / input_scale + input_zero_point).astype(np.uint8))
np.save('y_test_gestures_demo.npy', (y_test.astype(np.uint8)))

# print the location of the files
print('Test image data location: ', os.path.abspath('x_test_gestures_demo.npy'))
print('Test labels location: ', os.path.abspath('y_test_gestures_demo.npy'))

Test image data location:  /home/amroset/Machine Learning on Microcontrollers/Project/x_test_gestures_demo.npy
Test labels location:  /home/amroset/Machine Learning on Microcontrollers/Project/y_test_gestures_demo.npy


In [14]:
from qkeras import *
import gc

def get_cnn_quantized_model(num_classes):

        gc.collect()
        keras.backend.clear_session()
    
    
        #qkeras model

        quantized_model = keras.Sequential([
                        keras.layers.Input((32,3)),
                        keras.layers.Reshape((8,4,3)),
                        #QActivation("quantized_bits(16)"),
                        QConv2D(filters=8, kernel_size = (5,5), padding = "same", kernel_quantizer="quantized_bits(8)"),
                        keras.layers.BatchNormalization(),
                        QActivation("quantized_relu(8)"),
                        QConv2D(filters=8, kernel_size = (5,5), padding = "same", kernel_quantizer="quantized_bits(8)"),
                        keras.layers.BatchNormalization(),
                        QActivation("quantized_relu(8)"),
                        keras.layers.MaxPool2D(pool_size = 2),
                        keras.layers.Flatten(),
                        QDense(32, kernel_quantizer="quantized_bits(8)"),
                        keras.layers.BatchNormalization(),
                        QActivation("quantized_relu(8)"),
                        QDense(num_classes, kernel_quantizer="quantized_bits(8)"),
                        keras.layers.Activation('softmax'),
                        ])

        return quantized_model

In [15]:
qmodel = get_cnn_quantized_model(len(label_encoder.classes_))

qmodel.compile(loss="categorical_crossentropy", optimizer='adam', metrics = ['accuracy'])

qmodel.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 8, 4, 3)           0         
                                                                 
 q_conv2d (QConv2D)          (None, 8, 4, 8)           608       
                                                                 
 batch_normalization (Batch  (None, 8, 4, 8)           32        
 Normalization)                                                  
                                                                 
 q_activation (QActivation)  (None, 8, 4, 8)           0         
                                                                 
 q_conv2d_1 (QConv2D)        (None, 8, 4, 8)           1608      
                                                                 
 batch_normalization_1 (Bat  (None, 8, 4, 8)           32        
 chNormalization)                                       

In [16]:
# Train and evaluate the quantization aware model
es = [
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, min_delta=0.0001, mode='auto', cooldown=0, min_lr=0)
                
    ]
qmodel.fit(
                  X_train,
                  y_train,
                  epochs=20,
                  validation_data=(X_validation, y_validation),
                  callbacks=[es]
              )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x747534103460>

In [17]:
# evaluate the model on the test set
quant_loss, quant_acc = qmodel.evaluate(X_test, y_test, verbose=0)
print('Quantization aware training loss: ', quant_loss)
print('Quantization aware training accuracy: ', quant_acc)
qmodel.save('qat_cnn_gesture_classification_model.h5')

Quantization aware training loss:  0.021411869674921036
Quantization aware training accuracy:  0.9957374334335327


  saving_api.save_model(


In [18]:
# convert the QAT model to a fully quantized model using TFLite

def representative_data_gen():
  for input_value in tf.data.Dataset.from_tensor_slices(X_train).batch(1).take(100):
    yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(qmodel)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant_int8_qat = converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpm5c9q6s5/assets


INFO:tensorflow:Assets written to: /tmp/tmpm5c9q6s5/assets
2025-01-02 17:26:04.521530: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-01-02 17:26:04.521547: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-01-02 17:26:04.521644: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpm5c9q6s5
2025-01-02 17:26:04.524809: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-01-02 17:26:04.524819: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpm5c9q6s5
2025-01-02 17:26:04.533766: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-01-02 17:26:04.595482: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpm5c9q6s5
2025-01-02 17:26:04.622605: I tensorflow/cc/saved_model/loader.cc:316] SavedModel

In [19]:
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant_int8_qat)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)
# Save the quantized model to disk
open("models/demo_gesture_qat_int8.tflite", "wb").write(tflite_model_quant_int8_qat)

# Show the model size for the 8-bit quantized TFLite model
tflite_quant_in_kb = os.path.getsize('models/demo_gesture_qat_int8.tflite') / 1024
print("TFLite Model size with 8-bit quantization: %d KB" % tflite_quant_in_kb)

input:  <class 'numpy.uint8'>
output:  <class 'numpy.uint8'>
TFLite Model size with 8-bit quantization: 21 KB


In [20]:
c_model_name = 'demo_qat8_gesture'
# Write TFLite model to a C source (or header) file
with open(c_model_name + '.h', 'w') as file:
    file.write(hex_to_c_array(tflite_model_quant_int8_qat, c_model_name))

In [21]:
# this might take a few minutes (~ 1- 2 minutes)
# if it takes longer than that, I suggest to 
# restart the runtime and try again
# if the issue still persists, restart your computer
tflite_model_quant_int8_qat_file = pathlib.Path('models/demo_gesture_qat_int8.tflite')
tflite_model_quant_int8_qat_type = "Full QAT INT8"

evaluate_model(tflite_model_quant_int8_qat_file, tflite_model_quant_int8_qat_type)
print("Full-precision model accuracy is %.4f%% (Number of test samples=%d)" % (test_accuracy * 100, len(X_train)))

Evaluated on 0 sequences.
Evaluated on 100 sequences.
Evaluated on 200 sequences.
Evaluated on 300 sequences.
Evaluated on 400 sequences.
Evaluated on 500 sequences.
Evaluated on 600 sequences.
Evaluated on 700 sequences.
Evaluated on 800 sequences.
Evaluated on 900 sequences.
Evaluated on 1000 sequences.
Evaluated on 1100 sequences.
Full QAT INT8 model accuracy is 99.4032% (Number of test samples=1173)
Full-precision model accuracy is 97.3572% (Number of test samples=3518)


In [22]:
# evaluate the model on the test set
tflite_model_quant_int8_qat_file = "models/demo_gesture_qat_int8.tflite"
tflite_model_quant_int8_qat_model_type = "Quantized aware training model"
tflite_model_quant_int8_qat_pred = run_tflite_model(tflite_model_quant_int8_qat_file, range(X_train.shape[0]))


Evaluated on 0 sequences.
Evaluated on 100 sequences.
Evaluated on 200 sequences.
Evaluated on 300 sequences.
Evaluated on 400 sequences.
Evaluated on 500 sequences.
Evaluated on 600 sequences.
Evaluated on 700 sequences.
Evaluated on 800 sequences.
Evaluated on 900 sequences.
Evaluated on 1000 sequences.
Evaluated on 1100 sequences.


In [23]:
# compute the accuracy of the model
full_qat_int8_accuracy = accuracy_score(labels, tflite_model_quant_int8_qat_pred)
print('Full QAT INT8 accuracy is %.4f%% (Number of test samples=%d)' % (full_qat_int8_accuracy * 100, len(y_test)))
print('Full-precision model accuracy is %.4f%% (Number of test samples=%d)' % (test_accuracy * 100, len(y_test)))

Full QAT INT8 accuracy is 99.4032% (Number of test samples=1173)
Full-precision model accuracy is 97.3572% (Number of test samples=1173)
