From 6094dc3cbd0e162b3dd26cc606946836e2a8607b Mon Sep 17 00:00:00 2001 From: Alex Kogan Date: Thu, 11 May 2023 13:40:00 -0400 Subject: [PATCH] add 'quantize_groups' argument to inference test --- inference/huggingface/text-generation/inference-test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inference/huggingface/text-generation/inference-test.py b/inference/huggingface/text-generation/inference-test.py index f181bf6ea..bccc3545f 100644 --- a/inference/huggingface/text-generation/inference-test.py +++ b/inference/huggingface/text-generation/inference-test.py @@ -25,6 +25,7 @@ parser.add_argument("--use_cache", default=True, type=bool, help="use cache for generation") parser.add_argument("--test_performance", action='store_true', help="enable latency, bandwidth, and throughout testing") parser.add_argument("--local_rank", type=int, default=0, help="local rank") +parser.add_argument("--quantize_groups", type=int, required=False, default=0, help="number of weight quantization groups to use") args = parser.parse_args() def print_perf_stats(latency_set, config, warmup=3): @@ -78,6 +79,7 @@ def print_perf_stats(latency_set, config, warmup=3): replace_method=args.replace_method, max_tokens=args.max_tokens, save_mp_checkpoint_path=args.save_mp_checkpoint_path, + quantize_groups=args.quantize_groups, **ds_kwargs ) if local_rank == 0: