# Dataset: AFHQ
- 1. Training the model
- 2. Quantizing the model to full int8
- 3. Evaluating the quantized model on the test set
- 4. Exporting an image to a text file

Details:
- load the TensorFlow dataset (train, validation, test)
    - convert the TF dataset into a numpy one
    - convert the testing dataset into 2 numpy arrays (images and labels) for the on-mcu evaluation
- normalize the datasets from uint8 [0, 255] to float32 [0, 1]
- convert the TF training and testing datasets into a numpy one
- recover from the numpy training dataset the normalized images as a 4D numpy array for quantization (i.e for the representative dataset)
- recover from the numpy testing dataset the normalized images as a 4D numpy array and the labesl into 2 separate numpy arrays for the on-mcu evaluation
    - convert the TF datasets into numpy ones
- Create, build, compile and train the full precision model on the tf training dataset and validation dataset
- Evaluate the fp-model on the tensorflow test set
- Quantize the model to full int8 and recover the scaling and zero point parameters that converts float32 to int8
- Convert the numpy test images to int8 using the scaling and zero point parameters
- Save the int8 test images and labels to .npy format for on-mcu evaluation
- Evaluate the quantized model on the int8 test images
- Export an int8 train image to a text file for STM32 CubeIDE Build using a data.h file. (USING DECIMAL FORMAT, no need to convert to hex)

# Training the model

In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import tensorflow as tf
import tensorflow_datasets as tfds
PATH_TO_IMGS = 'dataset_afhq/'
train_ds = tf.keras.utils.image_dataset_from_directory(
    f'{PATH_TO_IMGS}train',
    seed=123,
    image_size=(64, 64),
    batch_size=32,
    validation_split=0.1,
    subset='training'
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    f'{PATH_TO_IMGS}train',
    seed=123,
    image_size=(64, 64),
    batch_size=32,
    validation_split=0.1,
    subset='validation'
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    f'{PATH_TO_IMGS}val',
    seed=123,
    image_size=(64, 64),
    batch_size=1,
    shuffle=False
)

Found 14630 files belonging to 3 classes.
Using 13167 files for training.
Found 14630 files belonging to 3 classes.
Using 1463 files for validation.
Found 1500 files belonging to 3 classes.


In [4]:
# convert the tensorflow dataset to numpy dataset
train_ds_np = tfds.as_numpy(train_ds) # needed for quantization
test_ds_np = tfds.as_numpy(test_ds)  # needed for testing the quantized model

# FOR THE ON-MCU EVALUATION
# convert the images to a unique 4D (num_samples, height, width, channels) numpy array
# for testing, we need both the images and the labels
test_images_np = np.concatenate([x for x, y in test_ds_np], axis=0)
print(f"test_images_np.shape = {test_images_np.shape}")
print(f"test_images_np.dtype = {test_images_np.dtype}")
test_labels_np = np.concatenate([y for x, y in test_ds_np], axis=0)
print(f'test_labels_np.shape = {test_labels_np.shape}')
print(f'test_labels_np.dtype = {test_labels_np.dtype}')

# save the test data as numpy arrays using UINT8 FORMAT
np.save('dataset_afhq/x_test_afhq.npy', test_images_np.astype(np.uint8))
np.save('dataset_afhq/y_test_afhq.npy', test_labels_np.astype(np.uint8))

# FOR THE OFF-MCU TRAINING
# Normalize the data
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

# convert the images to a unique 4D (num_samples, height, width, channels) numpy array
train_ds_np_scaled = tfds.as_numpy(train_ds)
test_ds_np_scaled = tfds.as_numpy(test_ds)
# for training, we only need the images
train_images_np_scaled = np.concatenate([x for x, y in train_ds_np_scaled], axis=0)
print(f'\ntrain_images_np_scaled.shape = {train_images_np_scaled.shape}')
print(f'train_images_np_scaled.dtype = {train_images_np_scaled.dtype}')
print(f'np.max(train_images_np_scaled) = {np.max(train_images_np_scaled)}')
print(f'np.min(train_images_np_scaled) = {np.min(train_images_np_scaled)}')
print(f'train_images_np_scaled[0,:,:,0] = \n{train_images_np_scaled[0,:,:,0]}') # image 0 channel 0
# for testing, we need both the images and the labels
test_images_np_scaled = np.concatenate([x for x, y in test_ds_np_scaled], axis=0)
print(f'\ntest_images_np_scaled.shape = {test_images_np_scaled.shape}')
print(f'test_images_np_scaled.dtype = {test_images_np_scaled.dtype}')
test_labels_np = np.concatenate([y for x, y in test_ds_np], axis=0)
print(f'test_labels_np.shape = {test_labels_np.shape}')
print(f'test_labels_np.dtype = {test_labels_np.dtype}')

test_images_np.shape = (1500, 64, 64, 3)
test_images_np.dtype = float32
test_labels_np.shape = (1500,)
test_labels_np.dtype = int32

train_images_np_scaled.shape = (13167, 64, 64, 3)
train_images_np_scaled.dtype = float32
np.max(train_images_np_scaled) = 1.0
np.min(train_images_np_scaled) = 0.0
train_images_np_scaled[0,:,:,0] = 
[[0.3627451  0.35882354 0.377451   ... 0.39901963 0.3794118  0.3745098 ]
 [0.39411768 0.3745098  0.38529414 ... 0.12352942 0.48333335 0.37058824]
 [0.40784317 0.45882356 0.6362746  ... 0.11470589 0.4411765  0.3137255 ]
 ...
 [0.1892157  0.3401961  0.33137256 ... 0.37352943 0.34607846 0.26666668]
 [0.22254904 0.327451   0.3372549  ... 0.28333336 0.29901963 0.3137255 ]
 [0.20490198 0.227451   0.26372552 ... 0.22450982 0.21274512 0.27450982]]

test_images_np_scaled.shape = (1500, 64, 64, 3)
test_images_np_scaled.dtype = float32
test_labels_np.shape = (1500,)
test_labels_np.dtype = int32


In [10]:
# FOR THE OFF-MCU TRAINING
# Normalize the data
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

# convert the images to a unique 4D (num_samples, height, width, channels) numpy array
train_ds_np_scaled_float32 = tfds.as_numpy(train_ds)
test_ds_np_scaled_float32 = tfds.as_numpy(test_ds)
# for training, we only need the images
train_images_np_scaled_float32 = np.concatenate([x for x, y in train_ds_np_scaled_float32], axis=0)
print(f'\ntrain_images_np_scaled_float32.shape = {train_images_np_scaled_float32.shape}')
print(f'train_images_np_scaled_float32.dtype = {train_images_np_scaled_float32.dtype}')
print(f'np.max(train_images_np_scaled_float32) = {np.max(train_images_np_scaled_float32)}')
print(f'np.min(train_images_np_scaled_float32) = {np.min(train_images_np_scaled_float32)}')
print(f'train_images_np_scaled_float32[0,:,:,0] = \n{train_images_np_scaled_float32[0,:,:,0]}') # image 0 channel 0
# for testing, we need both the images and the labels
test_images_np_scaled_float32 = np.concatenate([x for x, y in test_ds_np_scaled_float32], axis=0)
print(f'\ntest_images_np_scaled_float32.shape = {test_images_np_scaled_float32.shape}')
print(f'test_images_np_scaled_float32.dtype = {test_images_np_scaled_float32.dtype}')
test_labels_np = np.concatenate([y for x, y in test_ds_np_scaled_float32], axis=0)
print(f'test_labels_np.shape = {test_labels_np.shape}')
print(f'test_labels_np.dtype = {test_labels_np.dtype}')


train_images_np_scaled_float32.shape = (13167, 64, 64, 3)
train_images_np_scaled_float32.dtype = float32
np.max(train_images_np_scaled_float32) = 1.0
np.min(train_images_np_scaled_float32) = 0.0
train_images_np_scaled_float32[0,:,:,0] = 
[[0.02941177 0.02941177 0.02156863 ... 0.01176471 0.01176471 0.01176471]
 [0.0372549  0.0372549  0.02745098 ... 0.01176471 0.01176471 0.01176471]
 [0.03921569 0.03921569 0.03137255 ... 0.01176471 0.01176471 0.01176471]
 ...
 [0.4647059  0.49313727 0.5764706  ... 0.35980394 0.26666668 0.23529413]
 [0.4647059  0.49803925 0.57549024 ... 0.3637255  0.34313726 0.34411767]
 [0.4382353  0.47254905 0.5382353  ... 0.48529413 0.3892157  0.3784314 ]]

test_images_np_scaled_float32.shape = (1500, 64, 64, 3)
test_images_np_scaled_float32.dtype = float32
test_labels_np.shape = (1500,)
test_labels_np.dtype = int32


In [11]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Conv2D(filters=3, kernel_size=(3,3), strides=(1,1), name= 'conv2d_1',padding='same', data_format='channels_last', activation='relu'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), strides=(1,1), name='conv2d_2',padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), name='conv2d_3',padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), name='conv2d_4',padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
# model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), name='conv2d_5',padding='same', activation='relu'))
# model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(rate=0.2))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(units=32, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.2))
model.add(tf.keras.layers.Dense(units=16, activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.2))
model.add(tf.keras.layers.Dense(units=3, activation='softmax'))

In [12]:
tf.random.set_seed(1)
model.build(input_shape=(None,64,64,3))
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
# model.summary()

In [13]:
history = model.fit(train_ds, validation_data=val_ds, epochs=5, shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
score = model.evaluate(test_ds, verbose=2)

1500/1500 - 5s - loss: 0.2379 - accuracy: 0.9167 - 5s/epoch - 3ms/step


In [29]:
# save the model as .h5
MODEL_NAME = 'afhq_23k'
model.save(f'trained_models_afhq/{MODEL_NAME}.h5')

# Convert the model to TensorFlow Lite with 8-bit full quantization
!!! WARNING !!!: Inputs are 8bit not unsigned 8bit

In [15]:
IMG_SIZE = 64
# Convert Keras model to a tflite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Convert the model to the TensorFlow Lite format with quantization
quantize = True
if (quantize):
    def representative_dataset():
        for i in range(500):
            yield([train_images_np_scaled_float32[i].reshape(1, IMG_SIZE, IMG_SIZE, 3)]) # reshape(1,156,13,1) because the model expects a batch of data (similar to torch.unsqueeze(0))
    # Set the optimization flag.
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    # Enforce full-int8 quantization
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8  # or tf.uint8
    converter.inference_output_type = tf.int8  # or tf.uint8
    # Provide a representative dataset to ensure we quantize correctly.
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()

open(f'{MODEL_NAME}.tflite', 'wb').write(tflite_model)

tflite_interpreter = tf.lite.Interpreter(model_path=f'{MODEL_NAME}.tflite')
tflite_interpreter.allocate_tensors()
input_details = tflite_interpreter.get_input_details()
output_details = tflite_interpreter.get_output_details()

print("== Input details ==")
print("name:", input_details[0]['name'])
print("shape:", input_details[0]['shape'])
print("type:", input_details[0]['dtype'])

print("\n== Output details ==")
print("name:", output_details[0]['name'])
print("shape:", output_details[0]['shape'])
print("type:", output_details[0]['dtype'])

print(f'test_images_np.shape = {test_images_np_scaled_float32.shape}')
predictions = np.zeros((test_images_np_scaled_float32.shape[0],), dtype=int)
input_scale, input_zero_point = input_details[0]["quantization"]
print(input_details)



INFO:tensorflow:Assets written to: /var/folders/t3/v98l9wvd5v31cyynx1lwx12r0000gn/T/tmp53bzrli8/assets


INFO:tensorflow:Assets written to: /var/folders/t3/v98l9wvd5v31cyynx1lwx12r0000gn/T/tmp53bzrli8/assets
2023-06-10 13:23:00.422457: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-06-10 13:23:00.423868: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-06-10 13:23:00.430100: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/t3/v98l9wvd5v31cyynx1lwx12r0000gn/T/tmp53bzrli8
2023-06-10 13:23:00.437180: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-06-10 13:23:00.437208: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /var/folders/t3/v98l9wvd5v31cyynx1lwx12r0000gn/T/tmp53bzrli8
2023-06-10 13:23:00.455767: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-06-10 13:23:00.459124: I tensorflow/cc/saved_model/load

== Input details ==
name: serving_default_conv2d_1_input:0
shape: [ 1 64 64  3]
type: <class 'numpy.int8'>

== Output details ==
name: StatefulPartitionedCall:0
shape: [1 3]
type: <class 'numpy.int8'>
test_images_np.shape = (1500, 64, 64, 3)
[{'name': 'serving_default_conv2d_1_input:0', 'index': 0, 'shape': array([ 1, 64, 64,  3], dtype=int32), 'shape_signature': array([-1, 64, 64,  3], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.003921568859368563, -128), 'quantization_parameters': {'scales': array([0.00392157], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


In [20]:
print(f'test_images_np_scaled_float32[0][0][0][0] = {test_images_np_scaled_float32[0][0][0][0]} ({test_images_np_scaled_float32.dtype})')
print(f'test_images_np_scaled_float32.shape = {test_images_np_scaled_float32.shape}')
test_images_np_int8 = (test_images_np_scaled_float32 / input_scale + input_zero_point).astype(input_details[0]["dtype"])
print(f'test_images_np_int8[0][0][0][0] = {test_images_np_int8[0][0][0][0]} ({test_images_np_int8.dtype})')
print(f'test_images_np_int8.shape = {test_images_np_int8.shape}')

# saving the int8 images for on-mcu evaluation
np.save('dataset_afhq/x_test_afhq.npy', test_images_np_int8)
np.save('dataset_afhq/y_test_afhq.npy', test_labels_np)

test_images_np_scaled_float32[0][0][0][0] = 0.47745099663734436 (float32)
test_images_np_scaled_float32.shape = (1500, 64, 64, 3)
test_images_np_int8[0][0][0][0] = -6 (int8)
test_images_np_int8.shape = (1500, 64, 64, 3)


In [24]:
for id_img, img in enumerate(test_images_np_int8):
    # add batch dimension and convert to int8 to match with int8 input
    img = np.expand_dims(img, axis=0) # .astype(input_details[0]["dtype"])
    if id_img== 0:
        print(img[0][0][0][0])
        print(img.shape)
    tflite_interpreter.set_tensor(input_details[0]['index'], img)
    tflite_interpreter.allocate_tensors()
    tflite_interpreter.invoke()

    tflite_model_predictions = tflite_interpreter.get_tensor(output_details[0]['index'])
    #print("Prediction results shape:", tflite_model_predictions.shape)
    output = tflite_interpreter.get_tensor(output_details[0]['index'])
    predictions[id_img] = output.argmax()

import time
sum = 0
for i in range(len(predictions)):
    if (predictions[i] == test_labels_np[i]):
        sum = sum + 1
        print(f'sum = {sum}', end='\r')
accuracy_score = sum / len(predictions)
print(f"Sum of correct predictions is {sum}")
print(f"Accuracy of quantized to int8 model is {accuracy_score*100}%")
print(f"Compared to float32 accuracy of {score[1]*100}%")
print(f"We have a change of {(accuracy_score-score[1])*100}%")

-6
(1, 64, 64, 3)
Sum of correct predictions is 1370
Accuracy of quantized to int8 model is 91.33333333333333%
Compared to float32 accuracy of 91.66666865348816%
We have a change of -0.33333532015482614%


In [26]:
# Function: Convert some hex value into an array for C programming
def hex_to_c_array(hex_data, var_name):
    c_str = ''

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += '#define ' + var_name.upper() + '_H\n\n'

    # Add array length at top of file
    c_str += '\nunsigned int ' + var_name + '_len = ' + str(
        len(hex_data)) + ';\n'

    # Declare C variable
    c_str += 'unsigned char ' + var_name + '[] = {'
    hex_array = []
    for i, val in enumerate(hex_data):

        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formatting so each line stays within 80 characters
        if (i + 1) < len(hex_data):
            hex_str += ','
        if (i + 1) % 12 == 0:
            hex_str += '\n '
        hex_array.append(hex_str)

    # Add closing brace
    c_str += '\n ' + format(' '.join(hex_array)) + '\n};\n\n'

    # Close out header guard
    c_str += '#endif //' + var_name.upper() + '_H'

    return c_str


# Write TFLite model to a C source (or header) file
with open(f'{MODEL_NAME}.h', 'w') as file:
    file.write(hex_to_c_array(tflite_model, 'afhq'))  # 'ResNet' will be the name of the C++ object


# Convert the image to a txt file

In [28]:
# We need to convert our images scaled to [0, 1] to a int8 representation
# We use the scale and the zero point from tflite quantization details
image = (train_images_np_scaled_float32[0] / input_scale + input_zero_point).astype(np.int8)
cnt = 0
first = True
with open('dataset_afhq/afhq.txt', 'w') as f:
    for c in range(3):
        for i in range(IMG_SIZE):
            for j in range(IMG_SIZE):
                if cnt < 10:
                    if first == True:
                        print(f'{hex(image[i][j][c])},') # not necessary
                        print(f'{image[i][j][c]}')
                        first = False
                    f.write(f'{image[i][j][c]},')
                    cnt += 1
                else:
                    f.write(f'{image[i][j][c]},\n')
                    cnt = 0

-0x78,
-120
