# Weights Quantization

### Import Packages

In [1]:
import tensorflow as tf
import numpy as np
# Local packages
from ml_project_util.path import path_definition # type: error
from ml_project_util.flatten_model import flatten_condtitional # type: error
from ml_project_util.model_evaluation import model_evaluation_precise # type: error
from ml_project_util.quantization_util import wt_range_search, quant_weights # type: error

### Variable Paths, Execution Environments

In [2]:
BASE_PATH, PATH_DATASET, PATH_RAWDATA, PATH_JOINEDDATA, PATH_SAVEDMODELS = path_definition()
model_name = 'CD4_P2_FT_003_val0.0336'

### Load Model

In [3]:
short_name = model_name[:-10]
parent_name = model_name[:3]
filepath = f'{PATH_SAVEDMODELS}/{parent_name}/{model_name}.keras'
model = tf.keras.models.load_model(filepath)
model = flatten_condtitional(model, model_name)

### Evaluate Model

In [4]:
model_evaluation_precise(model)

Found 24997 files belonging to 2 classes.
Using 4999 files for validation.
Batch Number: 0
Batch Number: 1
Batch Number: 2
Batch Number: 3
Batch Number: 4
Batch Number: 5
Batch Number: 6
Batch Number: 7
Batch Number: 8
Batch Number: 9
Batch Number: 10
Batch Number: 11
Batch Number: 12
Batch Number: 13
Batch Number: 14
Batch Number: 15
Batch Number: 16
Batch Number: 17
Batch Number: 18
Batch Number: 19
Batch Number: 20
Batch Number: 21
Batch Number: 22
Batch Number: 23
Batch Number: 24
Batch Number: 25
Batch Number: 26
Batch Number: 27
Batch Number: 28
Batch Number: 29
Batch Number: 30
Batch Number: 31
Batch Number: 32
Batch Number: 33
Batch Number: 34
Batch Number: 35
Batch Number: 36
Batch Number: 37
Batch Number: 38
Batch Number: 39
Batch Number: 40
Batch Number: 41
Batch Number: 42
Batch Number: 43
Batch Number: 44
Batch Number: 45
Batch Number: 46
Batch Number: 47
Batch Number: 48
Batch Number: 49
Batch Number: 50
Batch Number: 51
Batch Number: 52
Batch Number: 53
Batch Number: 54


### Quantize Weights and Evaluate

In [5]:
qw_model = quant_weights(model, model_name, num_bits=8, mode='eval')

Weight quantization range has been read from C:/Programming_Files/JupyterVSCode/Binary_Classification_Transfer_Learning/CatsDogs/Docs_Reports/Quant/Ranges/CD4_P2_FT_003_wt_range.json.
Found 24997 files belonging to 2 classes.
Using 4999 files for validation.
Batch Number: 0
Batch Number: 1
Batch Number: 2
Batch Number: 3
Batch Number: 4
Batch Number: 5
Batch Number: 6
Batch Number: 7
Batch Number: 8
Batch Number: 9
Batch Number: 10
Batch Number: 11
Batch Number: 12
Batch Number: 13
Batch Number: 14
Batch Number: 15
Batch Number: 16
Batch Number: 17
Batch Number: 18
Batch Number: 19
Batch Number: 20
Batch Number: 21
Batch Number: 22
Batch Number: 23
Batch Number: 24
Batch Number: 25
Batch Number: 26
Batch Number: 27
Batch Number: 28
Batch Number: 29
Batch Number: 30
Batch Number: 31
Batch Number: 32
Batch Number: 33
Batch Number: 34
Batch Number: 35
Batch Number: 36
Batch Number: 37
Batch Number: 38
Batch Number: 39
Batch Number: 40
Batch Number: 41
Batch Number: 42
Batch Number: 43
Bat

In [7]:
from ml_project_util.quantization_util import quantize_tensor_symmetric

In [6]:
range_path='0'
quant='symmetric'
mode='eval'
batch_len=2

In [8]:
if(range_path=='0'):
    BASE_PATH, _, _, _, _ = path_definition()
    short_name = model_name[:-10]
    filepath = f'{BASE_PATH}/Docs_Reports/Quant/Ranges/{short_name}_wt_range.json'
    filepath = f''
else:
    filepath = range_path
try:
    with open(filepath, 'r') as f:
        range_dict = json.load(f)
    print(f'Weight quantization range has been read from {filepath}.')
except:
    print(f'Weight quantization not found in {filepath}, searching now...')
    weight_ranges = wt_range_search(model, model_name)



Weight quantization not found in , searching now...
{
  "block1_conv1": {
    "weight": {
      "min": -0.6714000701904297,
      "max": 0.6085159182548523
    },
    "bias": {
      "min": -0.015828926116228104,
      "max": 2.0640370845794678
    }
  },
  "block1_conv2": {
    "weight": {
      "min": -0.21561293303966522,
      "max": 0.2891709506511688
    },
    "bias": {
      "min": -1.027151346206665,
      "max": 0.9052184224128723
    }
  },
  "block2_conv1": {
    "weight": {
      "min": -0.33594822883605957,
      "max": 0.41661107540130615
    },
    "bias": {
      "min": -0.17922063171863556,
      "max": 0.36547425389289856
    }
  },
  "block2_conv2": {
    "weight": {
      "min": -0.1819043755531311,
      "max": 0.277375727891922
    },
    "bias": {
      "min": -0.5953347682952881,
      "max": 0.6337577700614929
    }
  },
  "block3_conv1": {
    "weight": {
      "min": -0.183063343167305,
      "max": 0.5444108247756958
    },
    "bias": {
      "min": -0.200

In [13]:
# Clone weights to new model
for layer in model.layers:
    if hasattr(layer, "get_weights") and hasattr(layer, "set_weights"):
        weights = layer.get_weights()
        if weights and layer.name in weight_ranges:
            layer_ranges = weight_ranges[layer.name]['weight']
            print(layer_ranges['min'])
            print(weights)
            new_weights = [
                quantize_tensor_symmetric(w, w_range)
                for w, w_range in zip(weights, layer_ranges)
            ]
            layer.set_weights(new_weights)

# evaluate new model
if(mode=='eval'):
    model_evaluation_precise(model, batch_len=batch_len)

-0.6714000701904297
[array([[[[ 4.29470569e-01,  1.17273867e-01,  3.40129584e-02, ...,
          -1.32241577e-01, -5.33475243e-02,  7.57738389e-03],
         [ 5.50379455e-01,  2.08774377e-02,  9.88311544e-02, ...,
          -8.48205537e-02, -5.11389151e-02,  3.74943428e-02],
         [ 4.80015397e-01, -1.72696680e-01,  3.75577137e-02, ...,
          -1.27135560e-01, -5.02991639e-02,  3.48965675e-02]],

        [[ 3.73466998e-01,  1.62062630e-01,  1.70863140e-03, ...,
          -1.48207128e-01, -2.35300660e-01, -6.30356818e-02],
         [ 4.40074533e-01,  4.73412387e-02,  5.13819456e-02, ...,
          -9.88498852e-02, -2.96195745e-01, -7.04357103e-02],
         [ 4.08547401e-01, -1.70375049e-01, -4.96297423e-03, ...,
          -1.22360572e-01, -2.76450396e-01, -3.90796512e-02]],

        [[-6.13601133e-02,  1.35693997e-01, -1.15694344e-01, ...,
          -1.40158370e-01, -3.77666801e-01, -3.00509870e-01],
         [-8.13870355e-02,  4.18543853e-02, -1.01763301e-01, ...,
          -9.

TypeError: string indices must be integers, not 'str'

In [19]:
weight_ranges = wt_range_search(model, model_name)

{
  "block1_conv1": {
    "weight": {
      "min": -0.6714000701904297,
      "max": 0.6085159182548523
    },
    "bias": {
      "min": -0.015828926116228104,
      "max": 2.0640370845794678
    }
  },
  "block1_conv2": {
    "weight": {
      "min": -0.21561293303966522,
      "max": 0.2891709506511688
    },
    "bias": {
      "min": -1.027151346206665,
      "max": 0.9052184224128723
    }
  },
  "block2_conv1": {
    "weight": {
      "min": -0.33594822883605957,
      "max": 0.41661107540130615
    },
    "bias": {
      "min": -0.17922063171863556,
      "max": 0.36547425389289856
    }
  },
  "block2_conv2": {
    "weight": {
      "min": -0.1819043755531311,
      "max": 0.277375727891922
    },
    "bias": {
      "min": -0.5953347682952881,
      "max": 0.6337577700614929
    }
  },
  "block3_conv1": {
    "weight": {
      "min": -0.183063343167305,
      "max": 0.5444108247756958
    },
    "bias": {
      "min": -0.20097896456718445,
      "max": 0.34949612617492676
   

In [None]:
def quantize_tensor_asymmetric_old(w, w_range, num_bits=8):
    qmin = 0
    qmax = 2**num_bits - 1

    w_min = w_range["min"]
    w_max = w_range["max"]

    # Avoid divide by zero
    if w_max == w_min:
        return w  # all weights are same

    scale = (w_max - w_min) / (qmax - qmin)
    zero_point = round(-w_min / scale)

    # Quantize
    q = np.round(w / scale + zero_point)
    q = np.clip(q, qmin, qmax)

    # Dequantize
    w_dequant = (q - zero_point) * scale
    return w_dequant

In [21]:
def quantize_tensor_symmetric(w, w_range, num_bits=8):
    qmin = -(2 ** (num_bits - 1) - 1)  # -127 for int8
    qmax = (2 ** (num_bits - 1) - 1)   # +127 for int8

    w_min = w_range["min"]
    w_max = w_range["max"]

    # Use symmetric range centered at 0
    max_abs = max(abs(w_min), abs(w_max))

    if max_abs == 0:
        return np.zeros_like(w)

    scale = max_abs / qmax  # ensure 0 maps to 0, and max_abs maps to ±127

    # Quantize
    q = np.round(w / scale)
    q = np.clip(q, qmin, qmax)

    # Dequantize
    w_dequant = q * scale

    return w_dequant

In [22]:
for layer in model.layers:
    if hasattr(layer, "get_weights") and hasattr(layer, "set_weights"):
        weights = layer.get_weights()
        if weights and layer.name in weight_ranges:
            layer_ranges = weight_ranges[layer.name]
            new_weights = [
                quantize_tensor_symmetric(w, w_range)
                for w, w_range in zip(weights, layer_ranges)
            ]
            layer.set_weights(new_weights)

In [9]:
# import numpy as np

# def quantize_weights(weights, num_bits=8):
#     # Calculate quantization range
#     qmin = -2**(num_bits - 1)
#     qmax = 2**(num_bits - 1) - 1

#     scale = np.max(np.abs(weights)) / qmax
#     if scale == 0:
#         return weights  # No quantization needed

#     # Quantize and dequantize
#     quantized = np.round(weights / scale)
#     quantized = np.clip(quantized, qmin, qmax)
#     dequantized = quantized * scale
#     return dequantized

# # Apply quantization layer by layer
# for layer in model.layers:
#     if hasattr(layer, "get_weights") and hasattr(layer, "set_weights"):
#         weights = layer.get_weights()
#         if weights:
#             quantized_weights = [quantize_weights(w) for w in weights]
#             layer.set_weights(quantized_weights)

In [11]:
model_evaluation_precise(model)

Found 24997 files belonging to 2 classes.
Using 4999 files for validation.
Batch Number: 0
Batch Number: 1
Batch Number: 2
Batch Number: 3
Batch Number: 4
Batch Number: 5
Batch Number: 6
Batch Number: 7
Batch Number: 8
Batch Number: 9
Batch Number: 10
Batch Number: 11
Batch Number: 12
Batch Number: 13
Batch Number: 14
Batch Number: 15
Batch Number: 16
Batch Number: 17
Batch Number: 18
Batch Number: 19
Batch Number: 20
Batch Number: 21
Batch Number: 22
Batch Number: 23
Batch Number: 24
Batch Number: 25
Batch Number: 26
Batch Number: 27
Batch Number: 28
Batch Number: 29
Batch Number: 30
Batch Number: 31
Batch Number: 32
Batch Number: 33
Batch Number: 34
Batch Number: 35
Batch Number: 36
Batch Number: 37
Batch Number: 38
Batch Number: 39
Batch Number: 40
Batch Number: 41
Batch Number: 42
Batch Number: 43
Batch Number: 44
Batch Number: 45
Batch Number: 46
Batch Number: 47
Batch Number: 48
Batch Number: 49
Batch Number: 50
Batch Number: 51
Batch Number: 52
Batch Number: 53
Batch Number: 54


---

### Create new model with fake quantization layers

In [23]:
# Float range dictionary path
range_name = model_name[:-10]
range_dict_path = f'{BASE_PATH}/Docs_Reports/Quant/Ranges/{range_name}_activation_range.json'

In [24]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense

# Custom FakeQuantLayer simulates quantization but preserves shape
class FakeQuantLayer(tf.keras.layers.Layer):
    def __init__(self, min_val=-6.0, max_val=6.0):
        super().__init__()
        self.min_val = min_val
        self.max_val = max_val

    def call(self, inputs):
        return tf.quantization.fake_quant_with_min_max_vars(inputs, min=self.min_val, max=self.max_val)
    
class SymmetricFakeQuantLayer(tf.keras.layers.Layer):
    def __init__(self, max_abs_val=6.0, num_bits=8, narrow_range=True, **kwargs):
        super().__init__(**kwargs)
        self.max_abs_val = max_abs_val
        self.min_val = -max_abs_val
        self.max_val = max_abs_val
        self.num_bits = num_bits
        self.narrow_range = narrow_range  # Set to True for signed int8 [-127, 127]

    def call(self, inputs):
        return tf.quantization.fake_quant_with_min_max_vars(
            inputs,
            min=self.min_val,
            max=self.max_val,
            num_bits=self.num_bits,
            narrow_range=self.narrow_range
        )

def clone_model_with_fake_quant(original_model, input_shape, range_dict):
    new_model = Sequential()
    layer_mapping = []
    quant_layers_list = list(range_dict.keys())

    # Add input layer explicitly
    new_model.add(tf.keras.Input(shape=input_shape))

    quant_layer = 0
    for layer in original_model.layers:
        config = layer.get_config()
        cloned_layer = layer.__class__.from_config(config)
        # Insert fake quant after Conv2D or Dense
        if isinstance(cloned_layer, (Conv2D, Dense)):
            tmp_min = range_dict[quant_layers_list[quant_layer]]['min']
            tmp_max = range_dict[quant_layers_list[quant_layer]]['max']
            abs_max = abs(tmp_min) if abs(tmp_min)>tmp_max else tmp_max
            #new_model.add(FakeQuantLayer(min_val=tmp_min, max_val=tmp_max))
            new_model.add(SymmetricFakeQuantLayer(max_abs_val=abs_max))
            quant_layer = quant_layer + 1
        # Clone layer from config
        new_model.add(cloned_layer)
        layer_mapping.append((layer, cloned_layer))

    # Build model by running dummy data through it
    dummy_input = tf.random.uniform((1, *input_shape))
    new_model(dummy_input)

    # Copy weights from original layers to cloned layers
    for orig_layer, cloned_layer in layer_mapping:
        if orig_layer.weights and cloned_layer.weights:
            try:
                cloned_layer.set_weights(orig_layer.get_weights())
            except ValueError as e:
                print(f"Skipping weights for layer {orig_layer.name} due to mismatch: {e}")

    new_model.build(input_shape=(None, *input_shape))  # Step 2

    dummy_input = tf.random.uniform((1, *input_shape))  # Step 3
    new_model(dummy_input)

    print("New model input shape:", new_model.input_shape)  # Step 4

    for orig_layer, cloned_layer in layer_mapping:
        try:
            cloned_layer.set_weights(orig_layer.get_weights())
        except Exception as e:
            print(f"Skipping weights for {orig_layer.name}: {e}")

    return new_model

In [25]:
import json

input_shape = (224, 224, 3)
try:
    with open(range_dict_path, 'r') as file:
        range_dict = json.load(file)
except:
    print('No float range dictionary found!')
quant_aware_model = clone_model_with_fake_quant(model, input_shape, range_dict)
quant_aware_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
quant_aware_model.summary()

New model input shape: (None, 224, 224, 3)


In [26]:
model_evaluation_precise(quant_aware_model)

Found 24997 files belonging to 2 classes.
Using 4999 files for validation.
Batch Number: 0
Batch Number: 1
Batch Number: 2
Batch Number: 3
Batch Number: 4
Batch Number: 5
Batch Number: 6
Batch Number: 7
Batch Number: 8
Batch Number: 9
Batch Number: 10
Batch Number: 11
Batch Number: 12
Batch Number: 13
Batch Number: 14
Batch Number: 15
Batch Number: 16
Batch Number: 17
Batch Number: 18
Batch Number: 19
Batch Number: 20
Batch Number: 21
Batch Number: 22
Batch Number: 23
Batch Number: 24
Batch Number: 25
Batch Number: 26
Batch Number: 27
Batch Number: 28
Batch Number: 29
Batch Number: 30
Batch Number: 31
Batch Number: 32
Batch Number: 33
Batch Number: 34
Batch Number: 35
Batch Number: 36
Batch Number: 37
Batch Number: 38
Batch Number: 39
Batch Number: 40
Batch Number: 41
Batch Number: 42
Batch Number: 43
Batch Number: 44
Batch Number: 45
Batch Number: 46
Batch Number: 47
Batch Number: 48
Batch Number: 49
Batch Number: 50
Batch Number: 51
Batch Number: 52
Batch Number: 53
Batch Number: 54
