# Libraries importing

In [None]:
import numpy as np
import os
import pickle
import tensorflow as tf
from tensorflow_model_optimization.quantization.keras import vitis_inspect, vitis_quantize
from tensorflow_model_optimization.python.core.quantization.keras.vitis.utils import model_utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam, SGD

2025-01-06 06:13:59.481050: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/xilinx/xrt/lib:/usr/lib:/usr/lib/x86_64-linux-gnu
2025-01-06 06:13:59.481090: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Function to load the CIFAR10 dataset (after downloading it locally)

In [2]:
def load_cifar10(data_dir):
    def unpickle(file):
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return dict

    # Load training data
    x_train, y_train = [], []
    for i in range(1, 6):  # CIFAR-10 training data consists of 5 batches
        batch = unpickle(os.path.join(data_dir, f"data_batch_{i}"))
        x_train.append(batch[b'data'])
        y_train.extend(batch[b'labels'])

    x_train = np.vstack(x_train).reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    y_train = np.array(y_train)

    # Load test data
    test_batch = unpickle(os.path.join(data_dir, "test_batch"))
    x_test = test_batch[b'data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    y_test = np.array(test_batch[b'labels'])

    return (x_train, y_train), (x_test, y_test)

# Actually loading CIFAR10

In [3]:
#CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = load_cifar10("cifar-10-batches-py")
print(train_images.shape)
train_images = train_images.reshape(train_images.shape[0], 32, 32, 3) #ensure shape 32 W x 32 H x 3 channels for each image
test_images = test_images.reshape(test_images.shape[0], 32, 32, 3)

#range 0-1
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

#One-hot encoding labels
train_labels = to_categorical(test_images, 10)
test_labels = to_categorical(test_labels, 10)

(50000, 32, 32, 3)


# Loading a trained model

In [9]:
input_shape = (None, 32, 32, 3)
model = load_model('no_avg_pool_model.h5')
model.build(input_shape=input_shape)
model.summary()

opt = SGD(learning_rate=0.1, momentum=0.9, decay=1e-4)

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d_40 (Conv2D)             (None, 32, 32, 64)   1792        ['input_3[0][0]']                
                                                                                                  
 batch_normalization_40 (BatchN  (None, 32, 32, 64)  256         ['conv2d_40[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 re_lu_34 (ReLU)                (None, 32, 32, 64)   0           ['batch_normalization_40[0]

# Copying the guide, 17:57 in the video (just inspection)

In [5]:
target_dpu = 'DPUCZDX8G_ISA1_B4096'
#target_dpu = "/opt/vitis_ai/compiler/arch/DPUCZDX8G/KV260/arch.json"
inspector = vitis_inspect.VitisInspector(target = target_dpu)

if not os.path.exists("inspect"):
    os.makedirs("inspect")

inspector.inspect_model(model,
                        input_shape = input_shape,
                        dump_model = True,
                        dump_model_file = "inspect/inspected_model.h5",
                        plot = True,
                        plot_file = "inspect/model_plot.svg",
                        dump_results = True,
                        dump_results_file = "inspect/inspect_results.txt",
                        verbose = 0)

[VAI INFO] Update include_bias_corr: False
[VAI INFO] Update include_fast_ft: False
[VAI INFO] Update include_cle: False
[VAI INFO] Layer dense_2(Dense): Dense<softmax> not supported, supported act types are ['relu', 'leaky_relu', 'relu_six']
[VAI INFO] Standalone activation `softmax` layer dense_2_softmax is not supported.
[VAI INFO] Inspect Results:
[MODEL INFO]:
________________________________________________________________________________________________________________________
Model Name: model_2
________________________________________________________________________________________________________________________
ID          Name                    Type                    Device      Notes                                           
0/67        input_1                 InputLayer              INPUT                                                       
------------------------------------------------------------------------------------------------------------------------
1/67   

# Quantize model

In [6]:
target_dpu = 'DPUCZDX8G_ISA1_B4096'
calib_data = train_images[:100]
# Create a tf.data.Dataset and ensure batch dimension
dataset = tf.data.Dataset.from_tensor_slices(calib_data) # convert 100,32,32,3 to a 100 item iterable/list
""" for item in dataset:
    print(item) """
# no need for data calibration because train_images was already calibrated earlier
dataset = dataset.batch(1) # group 5 images each time to check activation and weight distribution

# Inspect calibration data
""" for sample in dataset.take(100):
    print("Calibration sample shape:", sample.shape)
    print("Sample data range:", sample.numpy().min(), sample.numpy().max()) """

# Quantization
try:
    quantizer = vitis_quantize.VitisQuantizer(model, target=target_dpu) #int8 accordiing to docs
    q_model = quantizer.quantize_model(calib_dataset=dataset, 
                                       input_shape=input_shape, 
                                       calib_steps=None) #use all of the dataset
    q_model.save("quantized_model.h5")
    print("Quantized model saved successfully.")
except Exception as e:
    print("Error during quantization:", e)

[VAI INFO] Start CrossLayerEqualization...
[VAI INFO] CrossLayerEqualization Done.
[VAI INFO] Layer dense_2(Dense): Dense<softmax> not supported, supported act types are ['relu', 'leaky_relu', 'relu_six']
[VAI INFO] Standalone activation `softmax` layer dense_2_softmax is not supported.
[VAI INFO] Start Quantize Calibration...
[VAI INFO] Quantize Calibration Done.
[VAI INFO] Start Post-Quant Model Refinement...
[VAI INFO] Start Quantize Position Ajustment...
[VAI INFO] Quantize Position Ajustment Done.
[VAI INFO] Post-Quant Model Refninement Done.
[VAI INFO] Start Model Finalization...
[VAI INFO] Model Finalization Done.
[VAI INFO] Quantization Finished.
Quantized model saved successfully.


# Recompile & evaluate quantized model

In [7]:
batch_size = 1
#i dont think we need this - Zé 

#learning_rate = 0.0001
#momentum = 0
#epsilon = 1e-08

""" q_model.compile(
    optimizer = tf.keras.optimizers.RMSprop(
        learning_rate = learning_rate,
        momentum = momentum,
        epsilon = epsilon),
    loss = tf.keras.losses.CategoricalCrossentropy(reduction = tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE),
    metrics = ['acc'])
 """

# Instead:
#compile with a dummy optimizer, since we are not training 
q_model.compile(optimizer='sgd',
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=['accuracy'])

q_model.evaluate(test_images, test_labels, batch_size=batch_size)



[0.6162760853767395, 0.8844000101089478]

In [13]:
#dump dataset should have batch dim = 1 according to docs:
test_dataset = tf.data.Dataset.from_tensor_slices(test_images)
test_dataset.batch(1)
quantizer.dump_model(q_model, dataset = test_images, dump_float=False)

[VAI INFO] Start Dumping...
[VAI INFO] Dumping weights/biases...
[VAI INFO] Dumping (1/42): quant_conv2d_20/kernel
[VAI INFO] Dumping (2/42): quant_conv2d_20/bias
[VAI INFO] Dumping (3/42): quant_conv2d_21/kernel
[VAI INFO] Dumping (4/42): quant_conv2d_21/bias
[VAI INFO] Dumping (5/42): quant_conv2d_22/kernel
[VAI INFO] Dumping (6/42): quant_conv2d_22/bias
[VAI INFO] Dumping (7/42): quant_conv2d_23/kernel
[VAI INFO] Dumping (8/42): quant_conv2d_23/bias
[VAI INFO] Dumping (9/42): quant_conv2d_24/kernel
[VAI INFO] Dumping (10/42): quant_conv2d_24/bias
[VAI INFO] Dumping (11/42): quant_conv2d_26/kernel
[VAI INFO] Dumping (12/42): quant_conv2d_26/bias
[VAI INFO] Dumping (13/42): quant_conv2d_27/kernel
[VAI INFO] Dumping (14/42): quant_conv2d_27/bias
[VAI INFO] Dumping (15/42): quant_conv2d_25/kernel
[VAI INFO] Dumping (16/42): quant_conv2d_25/bias
[VAI INFO] Dumping (17/42): quant_conv2d_28/kernel
[VAI INFO] Dumping (18/42): quant_conv2d_28/bias
[VAI INFO] Dumping (19/42): quant_conv2d_29/

KeyboardInterrupt: 

# Compile the DPU

In [8]:
!vai_c_tensorflow2 -m "./quantized_model.h5" -a "/opt/vitis_ai/compiler/arch/DPUCZDX8G/KV260/arch.json" -o "xmodel" -n kria_model 

**************************************************
* VITIS_AI Compilation - Xilinx Inc.
**************************************************
[INFO] Namespace(batchsize=1, inputs_shape=None, layout='NHWC', model_files=['./quantized_model.h5'], model_type='tensorflow2', named_inputs_shape=None, out_filename='/tmp/kria_model_DPUCZDX8G_ISA1_B4096_org.xmodel', proto=None)
[INFO] tensorflow2 model: /workspace/quantized_model.h5
[INFO] keras version: 2.8.0
[INFO] Tensorflow Keras model type: functional
[INFO] parse raw model     :100%|█| 50/50 [00:00<00:00, 3970.98it/s]            
[INFO] infer shape (NHWC)  :100%|█| 81/81 [00:00<00:00, 11057.04it/s]           
[INFO] perform level-0 opt :100%|█| 2/2 [00:00<00:00, 109.51it/s]               
[INFO] perform level-1 opt :100%|█| 2/2 [00:00<00:00, 249.74it/s]               
[INFO] generate xmodel     :100%|█| 81/81 [00:00<00:00, 428.29it/s]             
[INFO] dump xmodel: /tmp/kria_model_DPUCZDX8G_ISA1_B4096_org.xmodel
[UNILOG][INFO] Compile mode: