In [6]:
import numpy as np
from tensorflow.lite.python.interpreter import Interpreter

interp = Interpreter(model_path="asl_model_quantized_int8.tflite")
interp.allocate_tensors()

print(f"{'Tensor':40s}  {'N':>6s} \t\t\t\t {'Scale':>8s} \t\t\t\t {'ZeroPt':>6s}")
print("-"*166)
for detail in interp.get_tensor_details():
    name  = detail["name"]
    shape = detail["shape"]
    N     = int(np.prod(shape)) if shape is not None else 0

    # Use the top‑level 'quantization' tuple when available
    try:
        scale, zero_point = detail["quantization"]
    except KeyError:
        # Fallback to the parameters dict
        scales = detail["quantization_parameters"]["scales"]
        zps    = detail["quantization_parameters"]["zero_points"]
        scale = float(scales[0])   if len(scales)  > 0 else None
        zero_point = int(zps[0])   if len(zps)     > 0 else None

    print(f"{name:40s}  {N:6d} \t\t\t\t {str(scale):>8s} \t\t\t\t {str(zero_point):>6s}")


Tensor                                         N 				    Scale 				 ZeroPt
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
serving_default_keras_tensor_30:0         120000 				 0.003921568859368563 				      0
arith.constant                                 2 				      0.0 				      0
tfl.pseudo_qconst                             29 				      0.0 				      0
tfl.pseudo_qconst1                         14848 				      0.0 				      0
tfl.pseudo_qconst2                           512 				      0.0 				      0
tfl.pseudo_qconst3                        37748736 				      0.0 				      0
tfl.pseudo_qconst4                           512 				 0.005283291917294264 				    113
tfl.pseudo_qconst5                           512 				 0.1490822583436966 				   -128
tfl.pseudo_qconst6                           512 				      0.0 				      0
tfl.pseudo_qconst7                        23592

In [14]:
import json, numpy as np, re, os
from tensorflow.lite.python.interpreter import Interpreter

def sanitize(name):
    # turn “conv2d/weights:0” → “conv2d_weights_0”
    return re.sub(r'[^0-9a-zA-Z_]', '_', name)

# Load and prepare interpreter
interp = Interpreter(model_path="asl_model_quantized_int8.tflite")
interp.allocate_tensors()

# Header preamble
os.makedirs("cpp_include", exist_ok=True)
hdr = [
    "#pragma once",
    "#include <cstdint>",
    "",
    "// Auto‑generated model parameters",
]

for detail in interp.get_tensor_details():
    name   = detail["name"]
    scales = detail["quantization_parameters"]["scales"]
    # Only keep real int8‑quantized buffers
    if scales.size == 0 or scales[0] == 0.0:
        continue

    # Try to grab the actual data; skip if no buffer allocated
    idx = detail["index"]
    try:
        arr = interp.get_tensor(idx)
    except ValueError:
        print(f"  ⚠️  Skipping {name}: no data buffer")
        continue

    arr = arr.flatten()
    shape = detail["shape"]
    zps   = detail["quantization_parameters"]["zero_points"]

    sname = sanitize(name)
    dtype = arr.dtype

    # emit quant params
    hdr.append(f"// --- tensor: {name}")
    hdr.append(f"static constexpr float {sname}_scale      = {float(scales[0]):.8g}f;")
    hdr.append(f"static constexpr int32_t {sname}_zero_point = {int(zps[0])};")
    # emit shape & size
    dims = ",".join(str(d) for d in shape)
    hdr.append(f"static constexpr int {sname}_shape[] = {{{dims}}};")
    hdr.append(f"static constexpr int {sname}_size  = {arr.size};")
    # emit raw data
    if dtype == np.int8:
        cpp_t = "int8_t"
    elif dtype == np.uint8:
        cpp_t = "uint8_t"
    else:
        cpp_t = "int32_t"
    vals = ",".join(str(int(x)) for x in arr)
    hdr.append(f"static const {cpp_t} {sname}_data[] = {{ {vals} }};\n")

# Write the header
with open("cpp_include/model_parameters.hpp", "w") as f:
    f.write("\n".join(hdr))

print("✅ Wrote cpp_include/model_parameters.hpp with", len(hdr), "lines")


  ⚠️  Skipping sequential_1_1/sequential_1/conv2d_1/Relu;sequential_1_1/sequential_1/conv2d_1/BiasAdd;sequential_1_1/sequential_1/conv2d_1/convolution;1: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/batch_normalization_1/batchnorm/mul_1: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/batch_normalization_1/batchnorm/add_1: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/conv2d_1_2/Relu;sequential_1_1/sequential_1/conv2d_1_2/BiasAdd;sequential_1_1/sequential_1/conv2d_1_2/convolution;sequential_1_1/sequential_1/conv2d_1_2/Squeeze: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/batch_normalization_1_2/batchnorm/mul_1: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/batch_normalization_1_2/batchnorm/add_1: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/max_pooling2d_1/MaxPool2d: no data buffer
  ⚠️  Skipping sequential_1_1/sequential_1/conv2d_2_1/Relu;sequential_1_1/sequential_1/conv2d_2_1/BiasAdd;sequential_1_1/sequential_1/

In [20]:
from PIL import Image
import numpy as np
from tensorflow.lite.python.interpreter import Interpreter

# 1) Load & prepare the interpreter
interp = Interpreter(
    model_path="asl_model_quantized_int8.tflite",
    experimental_preserve_all_tensors=True
)
interp.allocate_tensors()

# 2) Load your JPG and resize to 200×200 RGB
img = Image.open("asl_alphabet_test/C_test.jpg").convert("RGB")
img = img.resize((200, 200), Image.BILINEAR)

# 3) Turn into a (1,200,200,3) uint8 array
arr = np.array(img, dtype=np.uint8)
arr = arr[np.newaxis, ...]   # add batch dimension → shape (1,200,200,3)

# 4) Find the input tensor index & set it
input_details = interp.get_input_details()
idx = input_details[0]["index"]          # usually 0
# (you can also check input_details[0]["shape"] to confirm)
interp.set_tensor(idx, arr)

# 5) Invoke & then dump any tensor you like
interp.invoke()
out  = interp.get_tensor(input_details[0]["index"])  # just to verify
print("input round‑trip:", np.array_equal(out, arr))
output_details = interp.get_output_details()

out_info   = output_details[0]
out_index  = out_info["index"]
q_output   = interp.get_tensor(out_index)           # e.g. shape (1,N), dtype=int8/uint8
scale, zp  = out_info["quantization"]               # float scale and int zero‑point
# convert back to “real” floats if you like:
real_output = scale * (q_output.astype(np.float32) - zp)
print(real_output)

input round‑trip: True
[[0.         0.         0.99609375 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]]


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [None]:
import numpy as np
from tflite_runtime.interpreter import Interpreter   # <— note the change!

# 1) Load the interpreter WITHOUT any delegates and preserve intermediates
interp = Interpreter(
    model_path="asl_model_quantized_int8.tflite",
    experimental_preserve_all_tensors=True
    # no 'experimental_delegates' here – tflite-runtime has none by default
)
interp.allocate_tensors()

# 2) Build a dict of tensor details
tensor_details = { d["index"]: d for d in interp.get_tensor_details() }

# 3) Fetch the raw operator list
ops = interp._get_ops_details()

print("\nExecution steps (pure‑TFLite CPU ops):\n")
for step, op in enumerate(ops):
    print(f"Step {step:2d}: {op['op_name']}")
    def desc(tidx, role):
        d = tensor_details[tidx]
        shape = tuple(d["shape"])
        dtype = np.dtype(d["dtype"]).name
        q = d.get("quantization")
        if q:
            scale, zp = q
        else:
            scales = d["quantization_parameters"]["scales"]
            zps    = d["quantization_parameters"]["zero_points"]
            scale = float(scales[0]) if len(scales)>0 else None
            zp    = int(zps[0])    if len(zps)>0    else None
        return f"    {role} tensor #{tidx}: “{d['name']}” shape={shape}, type={dtype}, scale={scale}, zp={zp}"
    for tid in op["inputs"]:
        print(desc(tid, "IN "))
    for tid in op["outputs"]:
        print(desc(tid, "OUT"))
    print()



Execution steps (pure‑TFLite CPU ops):

Step  0: QUANTIZE
    IN  tensor #0: “serving_default_keras_tensor_30:0” shape=(1, 200, 200, 3), type=uint8, scale=0.003921568859368563, zp=0
    OUT tensor #38: “tfl.quantize” shape=(1, 200, 200, 3), type=int8, scale=0.003921568859368563, zp=-128

Step  1: CONV_2D
    IN  tensor #38: “tfl.quantize” shape=(1, 200, 200, 3), type=int8, scale=0.003921568859368563, zp=-128
    IN  tensor #37: “tfl.pseudo_qconst35” shape=(64, 3, 3, 3), type=int8, scale=0.0, zp=0
    IN  tensor #36: “tfl.pseudo_qconst34” shape=(64,), type=int32, scale=0.0, zp=0
    OUT tensor #39: “sequential_1_1/sequential_1/conv2d_1/Relu;sequential_1_1/sequential_1/conv2d_1/BiasAdd;sequential_1_1/sequential_1/conv2d_1/convolution;1” shape=(1, 200, 200, 64), type=int8, scale=0.14930161833763123, zp=-128

Step  2: MUL
    IN  tensor #39: “sequential_1_1/sequential_1/conv2d_1/Relu;sequential_1_1/sequential_1/conv2d_1/BiasAdd;sequential_1_1/sequential_1/conv2d_1/convolution;1” shape=(1,

In [8]:
import os, json, numpy as np
from tensorflow.lite.python.interpreter import Interpreter
from PIL import Image
import re

def sanitize(name):
    return re.sub(r'[^0-9a-zA-Z_]', '_', name)

# 1) Load & prepare the interpreter
interp = Interpreter(
    model_path="asl_model_quantized_int8.tflite",
    experimental_preserve_all_tensors=True
)
interp.allocate_tensors()

# 2) Load your JPEG, resize to 200×200, and set it as the input
img = Image.open("asl_alphabet_test/C_test.jpg").convert("RGB").resize((200,200))
input_arr = np.array(img, dtype=np.uint8)[None, ...]   # shape (1,200,200,3)

in_idx = interp.get_input_details()[0]["index"]
interp.set_tensor(in_idx, input_arr)

# 3) Run the graph so QUANTIZE and all other ops execute
interp.invoke()

# 4) Now dump to JSON
os.makedirs("data", exist_ok=True)
model_json = {}

for detail in interp.get_tensor_details():
    name   = detail["name"]
    scales = detail["quantization_parameters"]["scales"]
    zps    = detail["quantization_parameters"]["zero_points"]

    # only keep truly‑quantized tensors
    if scales.size == 0 or scales[0] == 0.0:
        continue

    idx = detail["index"]
    try:
        arr = interp.get_tensor(idx)
    except ValueError:
        continue

    entry = {
      "shape":      [int(d) for d in detail["shape"]],
      "dtype":      np.dtype(detail["dtype"]).name,
      "scale":      float(scales[0]),
      "zero_point": int(zps[0]),
      "data":       arr.flatten().tolist()
    }
    model_json[name] = entry

with open("data/model_dump.json", "w") as f:
    json.dump(model_json, f, indent=2)

print(f"Wrote {len(model_json)} tensors to data/model_dump.json")


Wrote 71 tensors to data/model_dump.json


In [11]:
py_out = None
# 1) Load the C++ output
import numpy as np
from tensorflow.lite.python.interpreter import Interpreter

# 1) Load the C++ output
cpp_path = "../CPP_Model/cpp_out/tfl_quantize_output.bin"
cpp_out  = np.fromfile(cpp_path, dtype=np.int8)

# 2) Run the Python TFLite quantize op
interp = Interpreter(
    model_path="asl_model_quantized_int8.tflite",
    experimental_preserve_all_tensors=True
)
interp.allocate_tensors()

# (Re‑set the same input as you used in C++)
from PIL import Image
img = Image.open("asl_alphabet_test/C_test.jpg").convert("RGB").resize((200,200))
inp = np.array(img, np.uint8)[None,...]
interp.set_tensor(interp.get_input_details()[0]["index"], inp)

interp.invoke()

# find the tfl.quantize tensor
py_out = None
for d in interp.get_tensor_details():
    if d["name"] == "tfl.quantize":
        py_out = interp.get_tensor(d["index"]).astype(np.int8).flatten()
        break
assert py_out is not None, "tfl.quantize tensor not found"

# 3) Print first and last 3 values
def fmt(arr):
    return ", ".join(f"{int(x)}" for x in arr)

print("First 3 values:")
print(f"  Python: {fmt(py_out[:3])}")
print(f"   C++  : {fmt(cpp_out[:3])}")

print("Last  3 values:")
print(f"  Python: {fmt(py_out[-3:])}")
print(f"   C++  : {fmt(cpp_out[-3:])}")

cpp_out  = np.fromfile(cpp_path, dtype=np.int8)

for d in interp.get_tensor_details():
    if d["name"] == "tfl.quantize":
        print(d["name"])
        py_out = interp.get_tensor(d["index"]).astype(np.int8).flatten()
        break
assert py_out is not None, "tfl.quantize tensor not found"


# 3) Check shapes match
if py_out.shape != cpp_out.shape:
    raise ValueError(f"Shape mismatch: python {py_out.shape}, cpp {cpp_out.shape}")

# 4) Compare
diff = py_out.astype(int) - cpp_out.astype(int)
n_mismatch = np.count_nonzero(diff)
max_err    = np.max(np.abs(diff)) if n_mismatch else 0

print(f"Total elements : {py_out.size}")
print(f"Mismatches     : {n_mismatch}")
print(f"Maximum error  : {max_err}")

if n_mismatch:
    idx0 = np.nonzero(diff)[0][0]
    print(f"\nFirst mismatch at index {idx0}:")
    print(f"  Python: {py_out[idx0]} (0x{py_out[idx0]&0xFF:02X})")
    print(f"  C++   : {cpp_out[idx0]} (0x{cpp_out[idx0]&0xFF:02X})")

First 3 values:
  Python: 54, 38, 12
   C++  : -74, -90, -116
Last  3 values:
  Python: -60, -69, -79
   C++  : 68, 59, 49
tfl.quantize
Total elements : 120000
Mismatches     : 120000
Maximum error  : 128

First mismatch at index 0:
  Python: 54 (0x36)
  C++   : -74 (0xB6)


In [21]:
import json
import numpy as np
from PIL import Image

# 1) Load the JSON dump
with open("data/model_dump.json") as f:
    model = json.load(f)

# 2) Extract the quantized input array
inp_meta = model["serving_default_keras_tensor_30:0"]
ref = np.array(inp_meta["data"], dtype=np.uint8)


# 3) Load your C++ output
cpp = np.fromfile("../CPP_Model/cpp_out/tfl_quantize_output.bin", dtype=np.uint8)

print("First  3 ref :", ref[:3].tolist(), "  cpp :", cpp[:3].tolist())
print("Last   3 ref :", ref[-3:].tolist(), "  cpp :", cpp[-3:].tolist())

matches    = ref == cpp
n_total    = ref.size
n_matches  = np.count_nonzero(matches)
n_mismatch = n_total - n_matches

print(f"Total elements : {n_total}")
print(f"Matches        : {n_matches}")
print(f"Mismatches     : {n_mismatch}")


# 4) Compare
# mismatches = np.nonzero(raw != q)[0]
# print(f"\nMismatches count: {len(mismatches)}")
# if len(mismatches):
#     i = mismatches[0]
#     print(f" First mismatch at idx {i}: json={raw[i]}, calc={q[i]}")


First  3 ref : [182, 166, 140]   cpp : [182, 166, 140]
Last   3 ref : [68, 59, 49]   cpp : [68, 59, 49]
Total elements : 120000
Matches        : 120000
Mismatches     : 0
