Skip to content

Commit

Permalink
[Algo] fix conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: Lu, Yintong <yintong.lu@intel.com>
  • Loading branch information
yintong-lu committed Nov 17, 2023
1 parent 0a20016 commit 453125a
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 77 deletions.
42 changes: 36 additions & 6 deletions neural_compressor/adaptor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,7 @@ def smooth_quant(
weight_clip=True,
auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"},
default_alpha=0.5,
shift_bias=True, # lyt_os_debug_1011
):
"""Convert the model by smooth quant.
Expand Down Expand Up @@ -1818,8 +1819,9 @@ def smooth_quant(
weight_clip=weight_clip,
default_alpha=default_alpha,
auto_alpha_args=auto_alpha_args,
shift_bias=shift_bias,
**kwargs,
)
) # lyt_os_debug_1011
if self.sq.record_max_info:
model.sq_max_info = self.sq.max_value_info
return model
Expand Down Expand Up @@ -1906,7 +1908,9 @@ def qdq_quantize(self, model, tune_cfg):
continue # for peft model,lora_B weights is 0.
for op_name in absorbed_layer:
module = get_module(q_model, op_name)
new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha)
new_module = SQLinearWrapper(
module, 1.0 / scale, input_minmax, alpha
) # lyt_os_debug_0822 #removed_1110
set_module(q_model, op_name, new_module)
logger.debug(f"Current SmoothQuant alpha of {op_name} is {alpha}")

Expand Down Expand Up @@ -2035,15 +2039,24 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):

# For smoothquant optimized model
recipe_cfgs = tune_cfg.get("recipe_cfgs", None)
if "smooth_quant_args" in recipe_cfgs and "shift_bias" in recipe_cfgs["smooth_quant_args"]: # lyt_os_debug_1013
do_OS = True if recipe_cfgs["smooth_quant_args"]["shift_bias"] is True else False
folding = recipe_cfgs["smooth_quant_args"]["folding"]
if (
recipe_cfgs
and recipe_cfgs.get("smooth_quant", False)
and not recipe_cfgs["smooth_quant_args"]["folding"]
and self.approach != "post_training_dynamic_quant"
and not do_OS # lyt_os_Debug_1013
):
return self.qdq_quantize(q_model, tune_cfg)

if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and recipe_cfgs["smooth_quant_args"]["folding"]:
if (
recipe_cfgs
and recipe_cfgs.get("smooth_quant", False)
and recipe_cfgs["smooth_quant_args"]["folding"]
and not do_OS
): # lyt_os_debug_1013
self._apply_pre_optimization(q_model, tune_cfg)

# For tensorboard display
Expand Down Expand Up @@ -2694,16 +2707,20 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
folding = False
else:
folding = recipe_cfgs["smooth_quant_args"]["folding"]
if "smooth_quant_args" in recipe_cfgs and "shift_bias" in recipe_cfgs["smooth_quant_args"]: # lyt_os_debug_1012
do_OS = True if recipe_cfgs["smooth_quant_args"]["shift_bias"] is True else False
logger.info(f"lyt_debug pytorch.py 2690 do_OS: {do_OS}, folding: {folding}") # lyt_os_debug_1012
# Update model parameter when smoothquant folding = False
if (
recipe_cfgs
and recipe_cfgs.get("smooth_quant", False)
and not folding
and self.approach != "post_training_dynamic_quant"
and not do_OS # lyt_os_debug_1012
):
return self.qdq_quantize(model, q_model, tune_cfg, dataloader, q_func)
# Update model parameter when smoothquant folding = True
if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and folding:
if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and folding and not do_OS: # lyt_os_debug_1012
self._apply_pre_optimization(model, tune_cfg)

assert (
Expand Down Expand Up @@ -3307,7 +3324,8 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
scale = torch.clip(input_power / weight_power, min=1e-5)
for op_name in absorbed_layer:
module = copy.deepcopy(get_module(q_model._model, op_name))
new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha)
new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha) # lyt_os_debug_1012
# new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha)
weight_scale = new_module._get_weight_scale()
smoothquant_scale_info[op_name] = {
"alpha": new_module.alpha,
Expand Down Expand Up @@ -3498,14 +3516,26 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):

# For smoothquant optimized model
recipe_cfgs = tune_cfg.get("recipe_cfgs", None)
if (
"smooth_quant_args" in recipe_cfgs and "shift_bias" in recipe_cfgs["smooth_quant_args"]
): # lyt_os_debug_1013-2
do_OS = True if recipe_cfgs["smooth_quant_args"]["shift_bias"] is True else False
do_OS = True
folding = recipe_cfgs["smooth_quant_args"]["folding"]
if (
recipe_cfgs
and recipe_cfgs.get("smooth_quant", False)
and not recipe_cfgs["smooth_quant_args"]["folding"]
and self.approach != "post_training_dynamic_quant"
and not do_OS # lyt_os_debug_1013-2
):
return self.qdq_quantize(q_model, tune_cfg)
if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and recipe_cfgs["smooth_quant_args"]["folding"]:
if (
recipe_cfgs
and recipe_cfgs.get("smooth_quant", False)
and recipe_cfgs["smooth_quant_args"]["folding"]
and not do_OS
): # lyt_os_debug_1013-2
self._apply_pre_optimization(q_model, tune_cfg)

self.tune_cfg = tune_cfg
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/adaptor/torch_utils/model_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _wrap_lwq_layer(model, lwq_layers, op_cfgs):


class SQLinearWrapper(torch.nn.Module):
def __init__(self, module, input_scale, input_minmax, alpha=0.5, dtype=torch.quint8):
def __init__(self, module, input_scale, input_minmax, alpha=0.5, dtype=torch.quint8): # lyt_os_debug #removed_1110
super().__init__()
self.register_buffer("input_scale", input_scale)
self.alpha = alpha
Expand Down

0 comments on commit 453125a

Please sign in to comment.