Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance 3.x common logger and update 3.x torch example #1783

Merged
merged 15 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 23 additions & 20 deletions .azure-pipelines/scripts/models/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,27 +83,30 @@ if [[ "${inc_new_api}" == "false" ]]; then
fi

cd ${model_src_dir}
pip install ruamel.yaml==0.17.40
pip install psutil
pip install protobuf==4.23.4
if [[ "${framework}" == "tensorflow" ]]; then
if [[ "${fwk_ver}" == *"-official" ]]; then
pip install tensorflow==${fwk_ver%-official}
else
pip install intel-tensorflow==${fwk_ver}

if [[ "${fwk_ver}" != "latest" ]]; then
pip install ruamel.yaml==0.17.40
pip install psutil
pip install protobuf==4.23.4
if [[ "${framework}" == "tensorflow" ]]; then
if [[ "${fwk_ver}" == *"-official" ]]; then
pip install tensorflow==${fwk_ver%-official}
else
pip install intel-tensorflow==${fwk_ver}
fi
elif [[ "${framework}" == "pytorch" ]]; then
pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
elif [[ "${framework}" == "onnxrt" ]]; then
pip install onnx==1.15.0
pip install onnxruntime==${fwk_ver}
elif [[ "${framework}" == "mxnet" ]]; then
pip install numpy==1.23.5
echo "re-install pycocotools resolve the issue with numpy..."
pip uninstall pycocotools -y
pip install --no-cache-dir pycocotools
pip install mxnet==${fwk_ver}
fi
elif [[ "${framework}" == "pytorch" ]]; then
pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
elif [[ "${framework}" == "onnxrt" ]]; then
pip install onnx==1.15.0
pip install onnxruntime==${fwk_ver}
elif [[ "${framework}" == "mxnet" ]]; then
pip install numpy==1.23.5
echo "re-install pycocotools resolve the issue with numpy..."
pip uninstall pycocotools -y
pip install --no-cache-dir pycocotools
pip install mxnet==${fwk_ver}
fi

if [ -f "requirements.txt" ]; then
Expand Down
28 changes: 21 additions & 7 deletions .azure-pipelines/scripts/models/run_model_trigger_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ do
esac
done

function check_results() {
local control_phrase=$1
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
}

log_dir="/neural-compressor/.azure-pipelines/scripts/models"
SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
if [[ "${inc_new_api}" == "3x"* ]]; then
Expand Down Expand Up @@ -90,16 +97,19 @@ elif [ "${mode}" == "tuning" ]; then
2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log
$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET
if [[ "${inc_new_api}" == "3x"* ]]; then
control_phrase="Quantization end."
control_phrase_1="Preparation end."
check_results $control_phrase_1
control_phrase_2="Conversion end."
check_results $control_phrase_2
else
control_phrase="model which meet accuracy goal."
check_results $control_phrase
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
fi
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi


$BOLD_GREEN && echo "====== Quantization SUCCEED!! ======" && $RESET
elif [ "${mode}" == "fp32_benchmark" ]; then
cd ${WORK_SOURCE_DIR}/${model_src_dir}
Expand Down Expand Up @@ -149,6 +159,10 @@ elif [ "${mode}" == "collect_log" ]; then
cd ${WORK_SOURCE_DIR}/${model_src_dir}
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
$BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET
if [ "${framework}" == "pytorch" ] && [ "${fwk_ver}" == "latest" ]; then
fwk_ver=$(python -c "import torch; print(torch.__version__)")
fi

python -u ${SCRIPTS_PATH}/collect_log_model.py \
--framework=${framework} \
--fwk_ver=${fwk_ver} \
Expand Down
17 changes: 11 additions & 6 deletions .azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ do
esac
done

echo "specify FWs version..."
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
FRAMEWORK="pytorch"
FRAMEWORK_VERSION=${pytorch_version}
TORCH_VISION_VERSION=${torchvision_version}

dataset_location=""
input_model=""
yaml=""
Expand Down Expand Up @@ -72,6 +66,17 @@ elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_ggml" ]; then
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_ggml"
fi

echo "Specify FWs version..."

FRAMEWORK="pytorch"
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
if [[ "${inc_new_api}" == "3x"* ]]; then
FRAMEWORK_VERSION="latest"
else
FRAMEWORK_VERSION=${pytorch_version}
TORCH_VISION_VERSION=${torchvision_version}
fi


/bin/bash run_model_trigger_common.sh \
--yaml=${yaml} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ def get_user_model():

# 3.x api
if args.approach == 'weight_only':
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, quantize
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
from neural_compressor.torch.utils import get_double_quant_config
weight_sym = True if args.woq_scheme == "sym" else False
double_quant_config_dict = get_double_quant_config(args.double_quant_type)

if args.woq_algo == "RTN":
if args.double_quant_type is not None:
double_quant_config_dict.update(
Expand Down Expand Up @@ -269,9 +269,8 @@ def get_user_model():
double_quant_group_size=args.double_quant_group_size,
)
quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
user_model = quantize(
model=user_model, quant_config=quant_config
)
user_model = prepare(model=user_model, quant_config=quant_config)
user_model = convert(model=user_model)
elif args.woq_algo == "GPTQ":
from utils import DataloaderPreprocessor
dataloaderPreprocessor = DataloaderPreprocessor(
Expand Down Expand Up @@ -326,24 +325,24 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
double_quant_group_size=args.double_quant_group_size,
)
quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
user_model = quantize(
model=user_model, quant_config=quant_config, run_fn=run_fn_for_gptq, run_args=(dataloader_for_calibration, )
)
user_model = prepare(model=user_model, quant_config=quant_config)
run_fn_for_gptq(user_model, dataloader_for_calibration)
user_model = convert(user_model)
else:
if args.sq:
from neural_compressor.torch.quantization import SmoothQuantConfig, quantize
from neural_compressor.torch.quantization import SmoothQuantConfig

# alpha can be a float number of a list of float number.
args.alpha = args.alpha if args.alpha == "auto" else eval(args.alpha)
if re.search("falcon", user_model.config.model_type):
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False)
else:
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=True)

if re.search("gpt", user_model.config.model_type):
quant_config.set_local("add", SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
else:
from neural_compressor.torch.quantization import quantize, get_default_static_config, StaticQuantConfig
from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig

quant_config = get_default_static_config()
if re.search("gpt", user_model.config.model_type):
Expand All @@ -364,12 +363,23 @@ def run_fn(model):
except ValueError:
pass
return

from utils import get_example_inputs
example_inputs = get_example_inputs(user_model, calib_dataloader)
user_model = quantize(
model=user_model, quant_config=quant_config, example_inputs=example_inputs, run_fn=run_fn
)
if args.sq:
# currently, smooth quant only support quantize API
# TODO: support prepare/convert API for smooth quant
from neural_compressor.torch.quantization import quantize

user_model = quantize(
model=user_model, quant_config=quant_config, example_inputs=example_inputs, run_fn=run_fn
)
else:
from neural_compressor.torch.quantization import prepare, convert

user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
run_fn(user_model)
user_model = convert(user_model)
user_model.save(args.output_dir)


Expand All @@ -394,7 +404,7 @@ def run_fn(model):
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
Expand All @@ -417,7 +427,7 @@ def run_fn(model):

samples = args.iters * args.batch_size
eval_args = LMEvalParser(
model="hf",
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
Expand All @@ -436,4 +446,4 @@ def run_fn(model):
print("Accuracy: %.5f" % acc)
print('Throughput: %.3f samples/sec' % (samples / (end - start)))
print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
print('Batch size = %d' % args.batch_size)
print('Batch size = %d' % args.batch_size)
4 changes: 3 additions & 1 deletion neural_compressor/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger,
Logger,
TuningLogger,
log_quant_execution,
log_process,
set_random_seed,
set_resume_from,
set_workspace,
Expand All @@ -32,6 +32,8 @@
"level",
"logger",
"Logger",
"TuningLogger",
"log_process",
"set_workspace",
"set_random_seed",
"set_resume_from",
Expand Down
9 changes: 9 additions & 0 deletions neural_compressor/common/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,12 @@
from typing import Callable, Union

OP_NAME_OR_MODULE_TYPE = Union[str, Callable]

# mode name
from enum import Enum


class Mode(Enum):
PREPARE = "prepare"
CONVERT = "convert"
QUANTIZE = "quantize"
25 changes: 21 additions & 4 deletions neural_compressor/common/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import logging
import os

from neural_compressor.common.utils import Mode

__all__ = [
"level",
"Logger", # TODO: not expose it
Expand Down Expand Up @@ -140,6 +142,17 @@ def warning(msg, *args, **kwargs):
logger = Logger


def _get_log_msg(mode):
log_msg = None
if mode == Mode.QUANTIZE:
log_msg = "Quantization"
elif mode == Mode.PREPARE: # pragma: no cover
log_msg = "Preparation"
elif mode == Mode.CONVERT: # pragma: no cover
log_msg = "Conversion"
return log_msg


class TuningLogger:
"""A unified logger for the tuning/quantization process.

Expand All @@ -155,12 +168,16 @@ def trial_start(cls, trial_index: int = None) -> None:
logger.info("%d-trail started.", trial_index)

@classmethod
def quantization_start(cls, stacklevel=2) -> None:
logger.info("Quantization started.", stacklevel=stacklevel)
def execution_start(cls, mode=Mode.QUANTIZE, stacklevel=2):
log_msg = _get_log_msg(mode)
assert log_msg is not None, "Please check `mode` in execution_start function of TuningLogger class."
logger.info("{} started.".format(log_msg), stacklevel=stacklevel)

@classmethod
def quantization_end(cls, stacklevel=2) -> None:
logger.info("Quantization end.", stacklevel=stacklevel)
def execution_end(cls, mode=Mode.QUANTIZE, stacklevel=2):
log_msg = _get_log_msg(mode)
assert log_msg is not None, "Please check `mode` in execution_end function of TuningLogger class."
logger.info("{} end.".format(log_msg), stacklevel=stacklevel)

@classmethod
def evaluation_start(cls) -> None:
Expand Down
27 changes: 17 additions & 10 deletions neural_compressor/common/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
import cpuinfo
import psutil

from neural_compressor.common.utils import TuningLogger, logger
from neural_compressor.common.utils import Mode, TuningLogger, logger

__all__ = [
"set_workspace",
"set_random_seed",
"set_resume_from",
"set_tensorboard",
"dump_elapsed_time",
"log_quant_execution",
"log_process",
"singleton",
"LazyImport",
"CpuInfo",
Expand Down Expand Up @@ -206,14 +206,21 @@ def set_tensorboard(tensorboard: bool):
default_tuning_logger = TuningLogger()


def log_quant_execution(func):
def wrapper(*args, **kwargs):
default_tuning_logger.quantization_start(stacklevel=4)
def log_process(mode=Mode.QUANTIZE):
def log_process_wrapper(func):
def inner_wrapper(*args, **kwargs):
start_log = default_tuning_logger.execution_start
end_log = default_tuning_logger.execution_end

# Call the original function
result = func(*args, **kwargs)
start_log(mode=mode, stacklevel=4)

default_tuning_logger.quantization_end(stacklevel=4)
return result
# Call the original function
result = func(*args, **kwargs)

return wrapper
end_log(mode=mode, stacklevel=4)

return result

return inner_wrapper

return log_process_wrapper
4 changes: 2 additions & 2 deletions neural_compressor/onnxrt/quantization/autotune.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def autotune(
if calibration_data_reader is not None:
calibration_data_reader.rewind()
tuning_logger.trial_start(trial_index=trial_index)
tuning_logger.quantization_start()
tuning_logger.execution_start()
logger.debug("quant config: {}".format(quant_config))
q_model = _quantize(model_input, quant_config=quant_config, calibration_data_reader=calibration_data_reader)
tuning_logger.quantization_end()
tuning_logger.execution_end()
tuning_logger.evaluation_start()
with tempfile.TemporaryDirectory(prefix="ort.quant.") as tmp_dir:
# evaluate API requires str input
Expand Down
Loading
Loading