In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='7'
import tensorflow as tf
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from easydict import EasyDict as edict
from tqdm import tqdm
FLAGS = edict()

# FLAGS.weights ='./checkpoints/day_tw_qat_tf23/save_model_final_tflite/'
FLAGS.weights ='./checkpoints/day_tw_qat_tf29/save_model_0059_tflite/'
FLAGS.output ='./checkpoints/tmp.tflite'
FLAGS.input_size =608
FLAGS.quantize_mode ='float32'
FLAGS.dataset ="datasets/data_selection_mix/anno/val_3cls.txt"


In [4]:
def representative_data_gen():
  len_img=50
  fimage = open(FLAGS.dataset).readlines()
  fimage = [line.split()[0] for line in fimage]
  np.random.seed(0)
  np.random.shuffle(fimage)
  with tqdm(total=len_img, ncols=200) as pbar:
    for input_value in range(len_img):
      if os.path.exists(fimage[input_value]):
        original_image=cv2.imread(fimage[input_value])
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        # Processing V1
        # image_data = utils.image_preprocess(np.copy(original_image), [FLAGS.input_size, FLAGS.input_size])
        #####################################################################################################
        # Processing V2
        image_data = cv2.resize(np.copy(original_image), (FLAGS.input_size, FLAGS.input_size))
        image_data = image_data / 255.0
        #####################################################################################################
        img_in = image_data[np.newaxis, ...].astype(np.float32)
        pbar.set_postfix({
          'image': fimage[input_value]
        })
        pbar.update(1)
        yield [img_in]
      else:
        pbar.set_postfix({
          'image': ''
        })
        pbar.update(1)

def add_statistic(filename):
  layer_stats = pd.read_csv(filename)
  print(layer_stats.head())
  layer_stats['range'] = 255.0 * layer_stats['scale']
  layer_stats['rmse/scale'] = layer_stats.apply(
      lambda row: np.sqrt(row['mean_squared_error']) / row['scale'], axis=1)
  return layer_stats
  # layer_stats[['op_name', 'range', 'rmse/scale']].head()
  

In [5]:
if not os.path.exists(os.path.dirname(FLAGS.output)):
  os.makedirs(os.path.dirname(FLAGS.output))
converter = tf.lite.TFLiteConverter.from_saved_model(FLAGS.weights)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS, tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.allow_custom_ops = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
debugger = tf.lite.experimental.QuantizationDebugger(
  converter=converter, debug_dataset=representative_data_gen)
debugger.run()


origin_int8_model = debugger.get_nondebug_quantized_model()
num_of_bytes = open('./origin_int8_model.tflite', 'wb').write(origin_int8_model)
print(f'origin model {num_of_bytes} bytes')

RESULTS_FILE = './debugger_results.csv'
RESULTS_FILE_V2 = './debugger_results_V2.csv'
with open(RESULTS_FILE, 'w') as f:
  debugger.layer_statistics_dump(f)
layer_stats = add_statistic(RESULTS_FILE)
layer_stats.to_csv(RESULTS_FILE_V2)


2022-06-28 12:19:34.145411: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-28 12:19:35.249761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9650 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:8c:00.0, compute capability: 7.5
2022-06-28 12:19:40.733680: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-06-28 12:19:40.733736: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-06-28 12:19:40.734538: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: ./checkpoints/day_tw

origin model 6011416 bytes
   op_name  tensor_idx  num_elements    stddev  mean_error  max_abs_error  \
0     RELU         118     2957312.0  0.006270   -0.002940       0.122591   
1      PAD         122     2976800.0  0.000000    0.000000       0.000000   
2  CONV_2D         126     1478656.0  0.173421    0.000573       0.929668   
3     RELU         130     1478656.0  0.048481    0.006388       0.131863   
4  CONV_2D         134     1478656.0  0.110942    0.000437       0.963905   

   mean_squared_error     scale  zero_point  \
0            0.000050  0.247125        -128   
1            0.000000  0.247125        -128   
2            0.030075  0.600441          14   
3            0.002393  0.266078        -128   
4            0.012308  0.384223          27   

                                         tensor_name  
0                              model/tf.nn.relu/Relu  
1                   model/quant_zero_padding2d_1/Pad  
2  model/quant_batch_normalization_1/FusedBatchNo...  
3      

In [18]:
import subprocess
# last_k=85
st=81
end=96
for last_k in range(1):
    suspected_layers = list(layer_stats['tensor_name'])[st:end]
    debug_options = tf.lite.experimental.QuantizationDebugOptions(
        denylisted_nodes=suspected_layers)
    debugger = tf.lite.experimental.QuantizationDebugger(
        converter=converter,
        debug_dataset=representative_data_gen,
        debug_options=debug_options)

    filename = f'./tflite_exp/selective_int8_model_rev_{len(suspected_layers)}layer_st{st}_end{end}.tflite'
    selective_quantized_model_dbg = debugger.get_nondebug_quantized_model()
    with open(filename, 'wb') as f:
        num_of_bytes = f.write(selective_quantized_model_dbg)
    f.close()
    print(f'selective model {num_of_bytes} bytes. {len(suspected_layers)} layers')
    subprocess.run(['python', 'evaluate_map_v3.py', '--weights', filename, '--framework', 'tflite'])

2022-06-28 14:52:31.275782: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-06-28 14:52:31.275845: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-06-28 14:52:31.276086: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: ./checkpoints/day_tw_qat_tf29/save_model_0059_tflite/
2022-06-28 14:52:31.311694: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-06-28 14:52:31.311739: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: ./checkpoints/day_tw_qat_tf29/save_model_0059_tflite/
2022-06-28 14:52:31.473567: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-06-28 14:52:32.023832: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: ./checkpoints/day_tw_qat_tf29/save_model_0059_tflite/
2022-06-28 14:52:32.240056: I

selective model 6037256 bytes. 15 layers


2022-06-28 14:53:19.468244: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


file /home/user/anaconda3/envs/WJtf29/lib/python3.8/site-packages/tensorflow/lite/python/interpreter is not end with tflite_runtime/interpreter


2022-06-28 14:53:21.694088: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-28 14:53:22.539882: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 508 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:8c:00.0, compute capability: 7.5
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Evaluating:   0%|                                                                                                                                                              | 0/1469 [00:00<?, ?it/s]

tflite
./tflite_exp/selective_int8_model_rev_15layer_st81_end96.tflite
CLASSES: {0: 'Green', 1: 'Red', 2: 'Yellow'}
./datasets/data_selection_mix/anno/val_3cls.txt   #########################################


Evaluating: 100%|███████████████| 1469/1469 [05:09<00:00,  4.75it/s, image_path=images_image_001786.jpg, correct_img=0.23(344/1469), recall_instance=0.49(1278/2630), accuracy_instance=0.42(1278/3039)]
100%|██████████| 1469/1469 [00:00<00:00, 24663.98it/s]
100%|██████████| 1469/1469 [00:00<00:00, 31192.23it/s]


Ground Truth yolo txt to coco json...
Saving JSON file to mAP/tmpzrrfmlgh/ground-truth.json
Predictions yolo txt to coco json...
Saving JSON file to mAP/tmpzrrfmlgh/predicted.json
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.85s).
Accumulating evaluation results...
DONE (t=0.21s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.097
category : 0 : 0.084
category : 1 : 0.126
category : 2 : 0.081
(all categories) mAP : 0.09710790000178256
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.357
category : 0 : 0.283
category : 1 : 0.379
category : 2 : 0.409
(all categories) mAP : 0.35710257398861484
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.031
category : 0 : 0.032
category : 1 : 0.057
category : 2 : 0.003
(all categ