In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import subprocess
import os

# Create directories if they don't exist
os.makedirs('GeneratedTfLiteFiles', exist_ok=True)
os.makedirs('GeneratedCHeaderFiles', exist_ok=True)

# Generate a synthetic multi-class dataset
X, y = make_classification(n_samples=2000,
                           n_features=10,
                           n_classes=5,
                           n_clusters_per_class=1,
                           n_informative=8,
                           random_state=42)

# Scale the input features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding
y_train_one_hot = tf.keras.utils.to_categorical(y_train)
y_test_one_hot = tf.keras.utils.to_categorical(y_test)
print(X_train.shape, y_train.shape)
print(X_test[0])
print(y_test[0])

2024-10-29 17:52:38.864968: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-29 17:52:38.876204: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-29 17:52:38.879752: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-29 17:52:38.888935: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


(1600, 10) (1600,)
[ 0.78458113 -2.008503    0.49006759 -0.6243383  -0.23459859  0.72880479
 -0.09425547  0.40643254 -0.0243661   0.45048382]
1


In [2]:
# Create a simple MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(10,)),
    tf.keras.layers.BatchNormalization(), # training only
    tf.keras.layers.Dropout(0.3), # training only
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

# Total two ops -> Dense (FULLY_CONNECTED: Composite ops)
# Finally SOFTMAX

# Number of Input -> 10, output -> 4 (total 5)

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model with early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = model.fit(X_train, y_train_one_hot,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[early_stopping],
                    verbose=1)

# Evaluate the original model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nOriginal Model Classification Report:")
print(classification_report(y_test, y_pred_classes, digits=5))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1730202760.457479  654643 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730202760.499324  654643 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730202760.502924  654643 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730202760.50694

Epoch 1/100


I0000 00:00:1730202761.761910  654796 service.cc:146] XLA service 0x2a9607c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730202761.761940  654796 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2024-10-29 17:52:41.808690: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-29 17:52:41.961803: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m36/40[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 1ms/step - accuracy: 0.2699 - loss: 1.7483 

I0000 00:00:1730202763.185990  654796 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.2772 - loss: 1.7319 - val_accuracy: 0.4781 - val_loss: 1.5271
Epoch 2/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5739 - loss: 1.2153 - val_accuracy: 0.5781 - val_loss: 1.4167
Epoch 3/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6601 - loss: 0.9978 - val_accuracy: 0.6750 - val_loss: 1.2364
Epoch 4/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7087 - loss: 0.8841 - val_accuracy: 0.7375 - val_loss: 1.0348
Epoch 5/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7478 - loss: 0.7324 - val_accuracy: 0.7937 - val_loss: 0.8347
Epoch 6/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7583 - loss: 0.6928 - val_accuracy: 0.8250 - val_loss: 0.6885
Epoch 7/100
[1m40/40[0m [32m━━━━━━━━━━━━━━

In [3]:
# Calculate input statistics for quantization
def get_input_statistics():
    """Calculate input statistics for quantization calibration"""
    # Use training data to compute input statistics
    input_data = X_train.astype(np.float32)

    # Calculate min, max, and mean for each feature
    input_min = np.min(input_data, axis=0)
    input_max = np.max(input_data, axis=0)
    input_mean = np.mean(input_data, axis=0)
    input_std = np.std(input_data, axis=0)

    return input_min, input_max, input_mean, input_std

input_min, input_max, input_mean, input_std = get_input_statistics()

# Representative dataset generator with input statistics
def representative_dataset():
    """Generate representative dataset for quantization with input statistics"""
    for i in range(500):  # Using 500 samples for calibration
        data = X_train[i:i+1].astype(np.float32)
        # Ensure the data is within the calculated ranges
        data = np.clip(data, input_min, input_max)
        yield [data]

# Convert to TensorFlow Lite with proper quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset

# Set input/output statistics for quantization
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Important: Disable per-channel quantization for dense layers else ggwp
converter._experimental_disable_per_channel_quantization_for_dense_layers = True

# Convert the model
tflite_model = converter.convert()

# Save the TensorFlow Lite model
tflite_model_path = os.path.join('GeneratedTfLiteFiles', 'model.tflite')
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

# Convert TFLite model to C header using xxd
header_file = os.path.join('GeneratedCHeaderFiles', 'model.h')
subprocess.run(['xxd', '-i', tflite_model_path, header_file])

# Read and modify the header file
with open(header_file, 'r') as f:
    header_content = f.read()

# Modify header content
header_content = header_content.replace(
    'unsigned char GeneratedTfLiteFiles_model_tflite[] = {',
    'const unsigned char g_model[] = {'
)
header_content = header_content.replace(
    'unsigned int GeneratedTfLiteFiles_model_tflite_len',
    'const unsigned int g_model_len'
)

# Write the modified header file with added input statistics
with open(header_file, 'w') as f:
    f.write("// Generated TensorFlow Lite model header for multi-class classification\n")
    f.write("#ifndef MODEL_H_\n#define MODEL_H_\n\n")

    # Add input statistics as constants
    f.write("// Input statistics for quantization\n")
    for i in range(len(input_mean)):
        f.write(f"#define INPUT_MEAN_{i} {input_mean[i]}f\n")
        f.write(f"#define INPUT_STD_{i} {input_std[i]}f\n")

    f.write("\n")
    f.write(header_content)
    f.write("\n#endif  // MODEL_H_")


INFO:tensorflow:Assets written to: /tmp/tmp7bc363z2/assets


INFO:tensorflow:Assets written to: /tmp/tmp7bc363z2/assets


Saved artifact at '/tmp/tmp7bc363z2'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  130308660454224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660457920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660626704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660628992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660625648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660627760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660630224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660632512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660630752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660633216: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130308660626352: Tensor

W0000 00:00:1730202768.144797  654643 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1730202768.144809  654643 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-10-29 17:52:48.145014: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp7bc363z2
2024-10-29 17:52:48.145904: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-10-29 17:52:48.145914: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp7bc363z2
2024-10-29 17:52:48.152544: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-10-29 17:52:48.153678: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-10-29 17:52:48.189673: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp7bc363z2
2024-10-29 17:52:48.199639: I tensorflow/cc/saved_model/loader.cc

In [4]:
# Test the TFLite model
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get quantization parameters
input_scale, input_zero_point = input_details[0]['quantization']
output_scale, output_zero_point = output_details[0]['quantization']

# Test the TFLite model with proper quantization
y_tflite_pred = []
for i in range(len(X_test)):
    # Quantize the input using the calculated statistics
    input_data = X_test[i].reshape(1, 10)
    # Clip input data to the range used during calibration
    input_data = np.clip(input_data, input_min, input_max)
    input_data_quantized = np.round(input_data / input_scale + input_zero_point)
    input_data_quantized = input_data_quantized.astype(np.int8)

    interpreter.set_tensor(input_details[0]['index'], input_data_quantized)
    interpreter.invoke()

    # Get the output and dequantize it
    # Only need to do it here. TFLM not needed, we can use the max value to find the class.
    output_data = interpreter.get_tensor(output_details[0]['index'])
    output_data_dequantized = (output_data.astype(np.float32) - output_zero_point) * output_scale

    y_tflite_pred.append(np.argmax(output_data_dequantized))

y_tflite_pred = np.array(y_tflite_pred)

print("\nTFLite Model Classification Report:")
print(classification_report(y_test, y_tflite_pred, digits=5))

# Save detailed results including input statistics
results_file = os.path.join('GeneratedTfLiteFiles', 'model_evaluation.txt')
with open(results_file, 'w') as f:
    f.write("Multi-class Classification Model Evaluation\n")
    f.write("=========================================\n\n")

    f.write("Input Statistics:\n")
    f.write(f"Mean: {input_mean}\n")
    f.write(f"Std: {input_std}\n")
    f.write(f"Min: {input_min}\n")
    f.write(f"Max: {input_max}\n\n")

    f.write("Quantization Parameters:\n")
    f.write(f"Input Scale: {input_scale}\n")
    f.write(f"Input Zero Point: {input_zero_point}\n")
    f.write(f"Output Scale: {output_scale}\n")
    f.write(f"Output Zero Point: {output_zero_point}\n\n")

    f.write("Original Model Classification Report:\n")
    f.write(classification_report(y_test, y_pred_classes, digits=5))

    f.write("\nTFLite Model Classification Report:\n")
    f.write(classification_report(y_test, y_tflite_pred, digits=5))

print(f"\nDetailed evaluation results saved to: {results_file}")

# Print summary of quantization parameters
print("\nQuantization Parameters Summary:")
print(f"Input Scale: {input_scale}")
print(f"Input Zero Point: {input_zero_point}")
print(f"Output Scale: {output_scale}")
print(f"Output Zero Point: {output_zero_point}")

# Print input statistics summary
print("\nInput Statistics Summary:")
print(f"Mean Range: [{np.min(input_mean):.4f}, {np.max(input_mean):.4f}]")
print(f"Std Range: [{np.min(input_std):.4f}, {np.max(input_std):.4f}]")
print(f"Input Range: [{np.min(input_min):.4f}, {np.max(input_max):.4f}]")


TFLite Model Classification Report:
              precision    recall  f1-score   support

           0    0.86765   0.85507   0.86131        69
           1    0.94048   0.91860   0.92941        86
           2    0.83516   0.93827   0.88372        81
           3    0.88000   0.84615   0.86275        78
           4    0.91463   0.87209   0.89286        86

    accuracy                        0.88750       400
   macro avg    0.88758   0.88604   0.88601       400
weighted avg    0.88924   0.88750   0.88755       400


Detailed evaluation results saved to: GeneratedTfLiteFiles/model_evaluation.txt

Quantization Parameters Summary:
Input Scale: 0.028471175581216812
Input Zero Point: 2
Output Scale: 0.00390625
Output Zero Point: -128

Input Statistics Summary:
Mean Range: [-0.0118, 0.0335]
Std Range: [0.9939, 1.0103]
Input Range: [-4.0525, 3.9369]


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
