Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .azure-pipelines/model-test-3x.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ parameters:
default:
- opt_125m_woq_gptq_int4
- opt_125m_woq_gptq_nf4_dq_bnb
- opt_125m_woq_gptq_int4_dq_ggml

stages:
- stage: PyTorchModels
Expand Down
6 changes: 3 additions & 3 deletions examples/3.x_api/pytorch/cv/fp8_quant/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('-t', '--tune', dest='tune', action='store_true',
help='tune best int8 model on calibration dataset')
help='tune best fp8 model on calibration dataset')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
Expand Down Expand Up @@ -94,7 +94,7 @@
help='For accuracy measurement only.')
parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
help='path to checkpoint tuned by Neural Compressor (default: ./)')
parser.add_argument('--int8', dest='int8', action='store_true',
parser.add_argument('--optimized', dest='optimized', action='store_true',
help='run benchmark')
parser.add_argument('--device', default='hpu', type=str,
help='use hpu device for fp8 quantization')
Expand Down Expand Up @@ -205,7 +205,7 @@ def eval_func(model):

if args.performance or args.accuracy:
model.eval()
if args.int8:
if args.optimized:
from neural_compressor.utils.pytorch import load
new_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)),
model,
Expand Down
6 changes: 3 additions & 3 deletions examples/3.x_api/pytorch/cv/static_quant/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@
help='For accuracy measurement only.')
parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
help='path to checkpoint tuned by Neural Compressor (default: ./)')
parser.add_argument('--int8', dest='int8', action='store_true',
help='Load int8 model.')
parser.add_argument('--optimized', dest='optimized', action='store_true',
help='Load optimized model.')
parser.add_argument("--calib_iters", default=128, type=int,
help="For calibration only.")
parser.add_argument("--iters", default=100, type=int,
Expand Down Expand Up @@ -222,7 +222,7 @@ def eval_func(model):
return

if args.performance or args.accuracy:
if args.int8:
if args.optimized:
from neural_compressor.torch.quantization import load
q_model = load(args.tuned_checkpoint)

Expand Down
8 changes: 4 additions & 4 deletions examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand All @@ -63,8 +63,8 @@ function run_benchmark {
echo "Error: No such mode: ${mode}"
exit 1
fi
if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi
echo $extra_cmd

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ sh run_quant.sh --alpha=0.44
```
To load a quantized model:
```bash
python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --load --int8
python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --load --optimized
```
or
```bash
Expand Down Expand Up @@ -79,5 +79,5 @@ python clip/clip_score.py \
```
Or you can use the bash script for all steps above:
```bash
sh run_benchmark.sh --mode=accuracy --int8=true
sh run_benchmark.sh --mode=accuracy --optimized=true
```
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def __call__(
parser.add_argument('--precision', default='fp32', type=str)
parser.add_argument('--base-output-dir', default="./output", type=str)
parser.add_argument('--quantized-unet', default="./saved_results", type=str)
parser.add_argument("--int8", action="store_true", help="Load quantized model.")
parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
parser.add_argument("--load", action="store_true")
parser.add_argument('--iters', default=5000, type=int, help="Num of image generated.")
parser.add_argument('--output-dir-name', default=None, type=str)
Expand Down Expand Up @@ -411,7 +411,7 @@ def __call__(
variant="fp16" if args.precision == 'fp16' else None,
torch_dtype=dtype)

if args.int8 and args.load:
if args.optimized and args.load:
from neural_compressor.torch.quantization import load
example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
"timestep": torch.tensor(951.0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ function init_params {
--iters=*)
iters=$(echo $var | cut -f2 -d=)
;;
--int8=*)
int8=$(echo $var | cut -f2 -d=)
--optimized=*)
optimized=$(echo $var | cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var | cut -f2 -d=)
Expand All @@ -43,8 +43,8 @@ function run_benchmark {
latent="latents.pt"
base_output_dir="./output/"

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi
echo $extra_cmd

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def main():
)
parser.add_argument("--quantize", action="store_true")
parser.add_argument("--load", action="store_true")
parser.add_argument("--int8", action="store_true", help="Load quantized model.")
parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
parser.add_argument("--performance", action="store_true")
parser.add_argument("--n_steps", type=int, default=20)
parser.add_argument("--batch-size", type=int, default=1)
Expand Down Expand Up @@ -404,7 +404,7 @@ def forward_loop(model):
q_unet.save(args.output_dir)

if args.load:
if args.int8:
if args.optimized:
from neural_compressor.torch.quantization import load
q_unet = load(os.path.abspath(os.path.expanduser(args.output_dir)))
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet
or
```shell
bash run_quant.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --optimized=true/false
```

### 2. ResNet50 With Intel PyTorch Extension
Expand All @@ -70,7 +70,7 @@ python main.py -t -a resnet50 --ipex --pretrained /path/to/imagenet
or
```shell
bash run_quant.sh --input_model=resnet50 --dataset_location=/path/to/imagenet
bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --optimized=true/false
```

### 3. ResNext101_32x16d With Intel PyTorch Extension
Expand All @@ -81,7 +81,7 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im
or
```shell
bash run_quant.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet
bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --optimized=true/false
```

# Run with Intel GPU
Expand All @@ -96,5 +96,5 @@ python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet --xpu
or
```shell
bash run_quant.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false --xpu=true/false
bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --optimized=true/false --xpu=true/false
```
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
help='For accuracy measurement only.')
parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
help='path to checkpoint tuned by Neural Compressor (default: ./)')
parser.add_argument('--int8', dest='int8', action='store_true',
parser.add_argument('--optimized', dest='optimized', action='store_true',
help='run benchmark')
parser.add_argument('--ipex', dest='ipex', action='store_true',
help='tuning or benchmark with Intel PyTorch Extension')
Expand Down Expand Up @@ -196,7 +196,7 @@ def main_worker(gpu, ngpus_per_node, args):
else:
model = quantize_models.__dict__[args.arch]()

if args.ipex and not args.int8:
if args.ipex and not args.optimized:
model = model.to(memory_format=torch.channels_last)

if not torch.cuda.is_available():
Expand Down Expand Up @@ -333,8 +333,8 @@ def run_fn(model):

if args.performance or args.accuracy:
model.eval()
if args.int8:
print("load int8 model")
if args.optimized:
print("load optimized model")
from neural_compressor.torch.quantization import load
model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)))
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--xpu=*)
xpu=$(echo ${var} |cut -f2 -d=)
Expand Down Expand Up @@ -66,8 +66,8 @@ function run_benchmark {
extra_cmd=$extra_cmd" --hub"
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi

if [[ ${xpu} == "true" ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand Down Expand Up @@ -64,7 +64,7 @@ function run_benchmark {
exit 1
fi

if [[ ${int8} == "true" ]]; then
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --load"
fi
echo $extra_cmd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand Down Expand Up @@ -94,7 +94,7 @@ function run_benchmark {
python_cmd="deepspeed --num_gpus 8"
fi

if [[ ${int8} == "true" ]]; then
if [[ ${optimized} == "true" ]]; then
${python_cmd} quantize.py \
--model ${tuned_checkpoint} \
--load\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand Down Expand Up @@ -68,8 +68,8 @@ function run_benchmark {
exit 1
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi
echo $extra_cmd

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
parser.add_argument(
"--approach", type=str, default="static", help="Select from ['dynamic', 'static', 'weight-only']"
)
parser.add_argument("--int8", action="store_true")
parser.add_argument("--optimized", action="store_true")
parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
parser.add_argument("--load", action="store_true", help="Load quantized model.")
parser.add_argument("--accuracy", action="store_true")
Expand Down Expand Up @@ -217,8 +217,8 @@ def eval_func(model):


if args.load:
if args.int8 or args.int8_bf16_mixed:
print("load int8 model")
if args.optimized or args.int8_bf16_mixed:
print("load optimized model")
from neural_compressor.torch.quantization import load

tokenizer = AutoTokenizer.from_pretrained(args.model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand Down Expand Up @@ -68,8 +68,8 @@ function run_benchmark {
exit 1
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi
echo $extra_cmd

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
parser.add_argument("--approach", type=str, default='static',
help="Select from ['dynamic', 'static', 'weight-only']")
parser.add_argument("--int8", action="store_true")
parser.add_argument("--optimized", action="store_true")
parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
parser.add_argument("--load", action="store_true", help="Load quantized model.")
parser.add_argument("--accuracy", action="store_true")
Expand Down Expand Up @@ -198,8 +198,8 @@ def run_fn(model):
user_model.save(args.output_dir)

if args.load:
if args.int8 or args.int8_bf16_mixed:
print("load int8 model")
if args.optimized or args.int8_bf16_mixed:
print("load optimized model")
from neural_compressor.torch.quantization import load
tokenizer = AutoTokenizer.from_pretrained(args.model)
config = AutoConfig.from_pretrained(args.model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ python run_clm_no_trainer.py --model facebook/opt-125m --quantize --output_dir q
python run_clm_no_trainer.py --model facebook/opt-125m --accuracy --tasks lambada_openai

# Measure the accuracy of the quantized model
python run_clm_no_trainer.py --model facebook/opt-125m --accuracy --tasks lambada_openai --int8 --output_dir qmodel_save_path
python run_clm_no_trainer.py --model facebook/opt-125m --accuracy --tasks lambada_openai --optimized --output_dir qmodel_save_path
```

#### Performance
Expand All @@ -41,5 +41,5 @@ python run_clm_no_trainer.py --model facebook/opt-125m --accuracy --tasks lamba
python run_clm_no_trainer.py --model facebook/opt-125m --performance

# Measure the performance of the quantized model
python run_clm_no_trainer.py --model facebook/opt-125m --performance --int8 --output_dir qmodel_save_path
python run_clm_no_trainer.py --model facebook/opt-125m --performance --optimized --output_dir qmodel_save_path
```
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ function init_params {
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
--optimized=*)
optimized=$(echo ${var} |cut -f2 -d=)
;;
--config=*)
tuned_checkpoint=$(echo $var |cut -f2 -d=)
Expand Down Expand Up @@ -67,8 +67,8 @@ function run_benchmark {
exit 1
fi

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
if [[ ${optimized} == "true" ]]; then
extra_cmd=$extra_cmd" --optimized"
fi
echo $extra_cmd

Expand Down
Loading
Loading