From 2e50295b78c171ed4cb23c1b895af9f19a5d7daf Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Tue, 25 Nov 2025 03:32:40 -0500
Subject: [PATCH 01/10] autotune target_bits example for llama recipe

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 .../quantization/auto_round/llama3/README.md  |  217 +
 .../auto_round/llama3/quantize.py             |  260 +
 .../llama3}/requirements.txt                  |    0
 .../quantization/mix-precision/README.md      |  125 -
 .../quantization/mix-precision/quantize.py    |  261 -
 .../Meta-Llama-3.1-8B-Instruct_7bits.json     | 2242 -------
 .../Meta-Llama-3.3-70B-Instruct_5bits.json    | 5602 -----------------
 .../quantization/mix-precision/run_hf_inf.py  |   29 -
 neural_compressor/common/base_config.py       |    3 +-
 .../torch/algorithms/weight_only/autoround.py |   17 +-
 .../torch/quantization/config.py              |  178 +-
 .../torch/utils/auto_accelerator.py           |    3 +-
 12 files changed, 508 insertions(+), 8429 deletions(-)
 create mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
 create mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
 rename examples/pytorch/nlp/huggingface_models/language-modeling/quantization/{mix-precision => auto_round/llama3}/requirements.txt (100%)
 delete mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
 delete mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
 delete mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
 delete mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
 delete mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
new file mode 100644
index 00000000000..d33d8090ed3
--- /dev/null
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -0,0 +1,217 @@
+# Step-by-step
+ 
+In this example, you can verify the accuracy on HPU/CUDA device with emulation of MXFP4, MXFP8, NVFP4 and uNVFP4.
+
+## Requirement
+
+```bash
+# neural-compressor-pt
+pip install neural-compressor-pt>=3.6
+# auto-round
+pip install auto-round>=0.8.0
+# other requirements
+pip install -r requirements.txt
+```
+
+## Quantization
+
+### Demo (`MXFP4`, `MXFP8`, `NVFP4`, `uNVFP4`)
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py  \
+    --model_name_or_path facebook/opt-125m  \
+    --quantize \
+    --dtype MXFP8 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path OPT-125M-MXFP8 \
+    --accuracy \
+    --tasks lambada_openai \
+    --eval_batch_size 8
+```
+
+Notes:
+- Use `--export_format auto_round` for `MXFP4`, `MXFP8` data type and do inference as [below](#mxfp4--mxfp8)
+- Use `--export_format llm_compressor` for `NVFP4` data type since public vLLM supports it.
+- Use `--export_format fake` for `uNVFP4` data type since it's not fully supported.
+- Setting `--quant_lm_head` applies `--dtype` for the lm_head layer.
+- Setting `--iters 0` skips AutoRound tuning and uses RTN method.
+
+
+#### Target_bits
+
+To achieve optimal compression ratios in mixed-precision quantization, we provide the `--target_bits` argument for automated precision configuration.
+
+- If you pass a single float number, it will automatically generate an optimal quantization recipe to achieve that target average bit-width.
+- If you pass multiple float numbers, it will generate multiple recipes for different target bit-widths, allowing you to compare trade-offs between model size and accuracy.
+
+Example usage:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py  \
+    --model_name_or_path facebook/opt-125m \
+    --quantize \
+    --dtype MXFP4 \
+    --target_bits 6.5 7 7.3 \
+    --tune_limit 100 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path OPT-125m-MXFP4-MXFP8 \
+    --accuracy \
+    --tasks lambada_openai \
+    --eval_batch_size 8
+```
+
+
+### Llama3 Quantization Recipes
+
+#### Llama 3.1 8B MXFP8
+
+AutoRound helps improve the accuracy, `iters` and `nsamples` is higher than default.
+```bash
+# Quantize and export AutoRound format
+CUDA_VISIBLE_DEVICES=0 python quantize.py \
+    --model_name_or_path /models/Meta-Llama-3.1-8B-Instruct \
+    --quantize \
+    --dtype MXFP8 \
+    --iters 1000 \
+    --nsamples 512 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path Llama-3.1-8B-MXFP8
+```
+
+
+#### Llama 3.1 8B MXFP4 (Mixed with MXFP8)
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py \
+    --model_name_or_path /models/Meta-Llama-3.1-8B-Instruct \
+    --quantize \
+    --target_bits 7.8 \
+    --options "MXFP4" "MXFP8" \
+    --shared_layer "k_proj" "v_proj" "q_proj" \
+    --shared_layer "gate_proj" "up_proj" \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path Llama-3.1-8B-MXFP4-MXFP8
+```
+
+#### Llama 3.3 70B MXFP8
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py  \
+    --model_name_or_path /models/Llama-3.3-70B-Instruct/ \
+    --quantize \
+    --dtype MXFP8 \
+    --quant_lm_head \
+    --iters 0 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path Llama-3.3-70B-MXFP8
+```
+
+#### Llama 3.3 70B MXFP4 (Mixed with MXFP8)
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py  \
+    --model_name_or_path /models/Llama-3.3-70B-Instruct/ \
+    --quantize \
+    --target_bits 5.8 \
+    --options "MXFP4" "MXFP8" \
+    --shared_layer "k_proj" "v_proj" "q_proj" \
+    --shared_layer "gate_proj" "up_proj" \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format auto_round \
+    --export_path Llama-3.3-70B-MXFP4-MXFP8
+```
+
+#### Llama 3.1 70B uNVFP4
+
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 python quantize.py  \
+    --model_name_or_path /models/Llama-3.1-70B-Instruct/ \
+    --quantize \
+    --dtype uNVFP4 \
+    --quant_lm_head \
+    --iters 0 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format fake \
+    --export_path Llama-3.1-70B-uNVFP4 \
+    --accuracy
+```
+
+Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES` or reducing `eval_batch_size` is suggested.
+
+## Inference
+
+### MXFP4 / MXFP8
+
+- Both pure MXFP4/MXFP8 and mix-precision model generated by target bits are supported.
+
+#### Prerequisite
+
+```bash
+# Install the forked vLLM
+git clone https://github.com/yiliu30/vllm-fork.git
+cd vllm-fork
+git checkout fused-moe-ar
+VLLM_USE_PRECOMPILED=1 pip install -e .
+```
+
+#### Accuracy Evaluation
+```bash
+# add_bos_token=True helps accuracy for general tasks
+VLLM_ENABLE_AR_EXT=1 \
+TORCH_COMPILE_DISABLE=1 \
+CUDA_VISIBLE_DEVICES=0 \
+lm_eval --model vllm \
+    --model_args pretrained=Llama-3.1-8B-MXFP4-MXFP8,add_bos_token=True,tensor_parallel_size=1,data_parallel_size=1 \
+    --tasks piqa,hellaswag,mmlu \
+    --batch_size 8 &
+wait
+# add_bos_token=True helps accuracy for GSM8K
+VLLM_ENABLE_AR_EXT=1 \
+TORCH_COMPILE_DISABLE=1 \
+CUDA_VISIBLE_DEVICES=0 \
+lm_eval --model vllm \
+    --model_args pretrained=Llama-3.1-8B-MXFP4-MXFP8,add_bos_token=False,tensor_parallel_size=1,data_parallel_size=1 \
+    --tasks gsm8k \
+    --batch_size 8
+```
+
+Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES`+`tensor_parallel_size` or reducing `batch_size` is suggested.
+
+### NVFP4
+NVFP4 is supported by vLLM already, please set `llm_compressor` format for exporting during quantization.
+
+```bash
+CUDA_VISIBLE_DEVICES=0 lm_eval --model vllm \
+    --model_args pretrained={nvfp4_model_path},tensor_parallel_size=1,data_parallel_size=1 \
+    --tasks lambada_openai \
+    --batch_size 4
+```
+
+### uNVFP4
+uNVFP4 is saved in fake format and reloading is not available currently. To verify accuracy after quantization, setting `--accuracy --tasks lambada_openai` in command.
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python quantize.py  \
+    --model_name_or_path facebook/opt-125m  \
+    --quantize \
+    --dtype uNVFP4 \
+    --enable_torch_compile \
+    --low_gpu_mem_usage \
+    --export_format fake \
+    --export_path OPT-125M-uNVFP4 \
+    --accuracy \
+    --tasks lambada_openai \
+    --eval_batch_size 8  \
+    --device_map 0
+```
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
new file mode 100644
index 00000000000..39d51edfc8f
--- /dev/null
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
@@ -0,0 +1,260 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import torch
+import transformers
+
+# For reproducibility
+torch.manual_seed(42)
+torch.use_deterministic_algorithms(True, warn_only=True)
+######################## HPU Memory Optimization ###########################
+# ensure that unnecessary memory is released during quantization.
+os.environ.setdefault("PT_HPU_LAZY_MODE", "1")
+os.environ.setdefault("PT_HPU_WEIGHT_SHARING", "0")
+if int(os.getenv("WORLD_SIZE", "0")) > 0:
+    os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0")
+    os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")
+from neural_compressor.torch.utils import is_hpex_available, world_size
+from neural_compressor.torch.quantization import autotune, prepare, convert, AutoRoundConfig, TuningConfig
+
+if is_hpex_available():
+    import habana_frameworks.torch.core as htcore
+    from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+
+    htcore.hpu_set_env()
+############################################################################
+
+
+def initialize_model_and_tokenizer(model_name_or_path):
+    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
+    config = transformers.AutoConfig.from_pretrained(model_name_or_path)
+    # using memory mapping with torch_dtype=config.torch_dtype
+    model = transformers.AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=config.torch_dtype)
+    model.eval()
+    return model, tokenizer
+
+
+def dispatch_model_on_devices(model):
+    from accelerate.big_modeling import dispatch_model, infer_auto_device_map
+    from accelerate.utils import get_max_memory, get_balanced_memory
+
+    no_split_modules = getattr(model, "_no_split_modules", [])
+    balanced_memory = get_balanced_memory(model)  # to initialize the function cache
+    auto_device_map = infer_auto_device_map(
+        model,
+        max_memory=balanced_memory,
+        no_split_module_classes=no_split_modules
+    )
+    print(auto_device_map)
+    model = dispatch_model(model, auto_device_map)
+    return model
+
+
+@torch.no_grad()
+def get_accuracy(model_name_or_path, tokenizer=None, limit=None):
+    os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+    all_accuracy = {}
+    test_gsm8k = False
+    test_normal = False
+    if "gsm8k" in args.tasks:
+        test_gsm8k = True
+        args.tasks.remove("gsm8k")
+    if args.tasks:
+        test_normal = True
+    import lm_eval
+    from lm_eval.models.huggingface import HFLM
+
+    ########################## gms8k (ahead of normal tasks) #########################
+    if test_gsm8k:
+        lm = HFLM(
+            pretrained=model_name_or_path,
+            tokenizer=tokenizer,
+            add_bos_token=False,
+            batch_size=args.eval_batch_size,
+        )
+        results_gsm8k = lm_eval.simple_evaluate(
+            lm,
+            tasks=["gsm8k"],
+            limit=args.limit if limit is None else limit,
+        )
+        for task_name, task_results in results_gsm8k["results"].items():
+            accu = task_results["exact_match,strict-match"]
+            all_accuracy[task_name] = accu
+    ########################## gms8k end #########################
+    if test_normal:
+        lm = HFLM(
+            pretrained=model_name_or_path,
+            tokenizer=tokenizer,
+            add_bos_token=True,
+            batch_size=args.eval_batch_size,
+        )
+        results = lm_eval.simple_evaluate(
+            lm,
+            tasks=args.tasks,
+            limit=args.limit if limit is None else limit,
+        )
+        for task_name, task_results in results["results"].items():
+            if "acc,none" in task_results:
+                accu = task_results["acc,none"]
+                all_accuracy[task_name] = accu
+    for task_name, accu in all_accuracy.items():
+        print(f"Accuracy for {task_name}: {accu:.4f}")
+    avg_accu = sum(all_accuracy.values())/len(all_accuracy)
+    print(f"Overall accuracy: {avg_accu:.4f}")
+    return avg_accu
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Habana FP8 quantization.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--model_name_or_path", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct", help="model name or path"
+    )
+    parser.add_argument("--dtype", type=str, default="MXFP4", choices=["MXFP4", "MXFP8", "NVFP4", "NVFP4+", "uNVFP4"], help="data type")
+    parser.add_argument("--quantize", action="store_true", help="whether to quantize model")
+    parser.add_argument("--device_map", type=str, default="auto", help="device map for model")
+    parser.add_argument(
+        "--target_bits",
+        type=float,
+        nargs="+",
+        default=None, 
+        help="target bits for mix precision"
+    )
+    parser.add_argument("--tolerable_loss", type=float, default=0.01, 
+            help="tolerable loss for accuracy autotune, relative value to the fp32 baseline")
+    parser.add_argument(
+        "--options",
+        type=str,
+        nargs="+",
+        default=[
+            "MXFP4",
+            "MXFP8",
+        ],
+        help="options for mix precision"
+    )
+    parser.add_argument(
+        "--shared_layer",
+        type=str,
+        nargs="+",
+        action='append',
+        default=[],
+        help="[mix-precision] ensure that listed layers are using same data type for quantization"
+    )
+    parser.add_argument("--use_recipe", action="store_true", help="whether to use recipe to quantize model")
+    parser.add_argument("--recipe_file", type=str, default="recipes/Meta-Llama-3.1-8B-Instruct_6bits.json", help="path of recipe file")
+    parser.add_argument("--iters", default=200, type=int, help="iters for autoround.")
+    parser.add_argument("--seqlen", default=2048, type=int, help="sequence length for autoround.")
+    parser.add_argument("--nsamples", default=128, type=int, help="number of samples for autoround.")
+    parser.add_argument("--save", action="store_true", help="whether to save the quantized model")
+    parser.add_argument("--export_path", type=str, default="saved_results", help="path to save the quantized model")
+    parser.add_argument("--export_format", type=str, default="auto_round", help="format to save the quantized model")
+    parser.add_argument("--enable_torch_compile", action="store_true", help="whether to enable torch.compile")
+    parser.add_argument("--low_gpu_mem_usage", action="store_true", help="whether to enable low_gpu_mem_usage")
+    parser.add_argument("--quant_lm_head", action="store_true", help="whether to quantize lm_head")
+    parser.add_argument("--accuracy", action="store_true", help="accuracy measurement")
+    parser.add_argument("--local_rank", type=int, default=0, metavar="N", help="Local process rank.")
+    parser.add_argument("--eval_batch_size", default=16, type=int, help="batch size for accuracy evaluation.")
+    parser.add_argument(
+        "--tasks",
+        type=str,
+        nargs="+",
+        default=[
+            "piqa",
+            "hellaswag",
+            "mmlu",
+            "gsm8k",
+        ],
+        help="tasks for accuracy validation, text-generation and code-generation tasks are different.",
+    )
+    parser.add_argument("--limit", type=int, default=None, help="number of samples for accuracy evaluation")
+    parser.add_argument("--tune_limit", type=int, default=100, help="number of samples for accuracy autotune")
+    args = parser.parse_args()
+
+    if args.target_bits is None:
+        print("Target data type:", args.dtype)
+    else:
+        print("Target data type for mix precision:", args.options)
+        print("Layers sharing the same data type:", args.shared_layer)
+    model, tokenizer = initialize_model_and_tokenizer(args.model_name_or_path)
+
+    if args.quantize:
+        if args.dtype in ["uNVFP4", "NVFP4+"]:
+            from auto_round.schemes import QuantizationScheme
+
+            uNVFP4 = QuantizationScheme.from_dict(
+                {
+                    "bits": 4,
+                    "group_size": 16,
+                    "data_type": "fp4_v2",
+                    "act_bits": 4,
+                    "act_data_type": "fp4_v2",
+                    "act_group_size": 16,
+                    "act_sym": True,
+                }
+            )
+            args.dtype = uNVFP4
+
+        layer_config = {}
+        if args.use_recipe:
+            ############ load recipe results (MXFP4 + MXFP8) ############
+            def load_recipe_results(file_path):
+                import json
+                with open(file_path, "r") as f:
+                    return json.load(f)
+
+            layer_config = load_recipe_results(args.recipe_file)
+        if args.quant_lm_head:
+            # ensure lm_head is quantized with mxfp8_config
+            layer_config.update({"lm_head": args.dtype})
+
+        # preprocess
+        if isinstance(args.target_bits, list) and len(args.target_bits) == 1:
+            args.target_bits = args.target_bits[0]
+        config = AutoRoundConfig(
+            tokenizer=tokenizer,
+            iters=args.iters,
+            seqlen=args.seqlen,
+            nsamples=args.nsamples,
+            scheme=args.dtype,
+            target_bits=args.target_bits,
+            options=args.options,
+            shared_layers=args.shared_layer,
+            enable_torch_compile=args.enable_torch_compile,
+            low_gpu_mem_usage=args.low_gpu_mem_usage,
+            export_format=args.export_format,
+            output_dir=args.export_path,
+            device_map=args.device_map,
+            layer_config=layer_config if (args.use_recipe or args.quant_lm_head) else None,
+        )
+        if isinstance(args.target_bits, list) and len(args.target_bits) > 1:
+            def eval_fn(model):
+                model = model.eval()
+                model = dispatch_model_on_devices(model)
+                accu = get_accuracy(model, tokenizer, args.tune_limit)
+                model = model.to("cpu")
+                return accu
+            tuning_config = TuningConfig(config_set=[config], tolerable_loss=args.tolerable_loss)
+            model = autotune(model, tuning_config, eval_fn=eval_fn)
+        else:
+            model = prepare(model, config)
+            model = convert(model)
+        print(f"Quantized model in {args.export_format} format is saved to {args.export_path}")
+
+    if args.accuracy:
+        model = dispatch_model_on_devices(model)
+        get_accuracy(model, tokenizer)
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
deleted file mode 100644
index 52d043474d1..00000000000
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
+++ /dev/null
@@ -1,125 +0,0 @@
-# Run
- 
-In this example, you can verify the accuracy on HPU/CUDA device with emulation of MXFP4, MXFP8, NVFP4 and uNVFP4.
-
-## Requirement
-
-```bash
-# neural-compressor-pt
-pip install neural-compressor-pt==3.6
-# auto-round
-pip install auto-round==0.8.0
-# other requirements
-pip install -r requirements.txt
-```
-**Before neural-compressor v3.6 and auto-round v0.8.0 release, please install from source for the latest updates:**
-```bash 
-# neural-compressor-pt
-INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@v3.6rc
-# auto-round
-pip install git+https://github.com/intel/auto-round.git@v0.8.0rc
-# other requirements
-pip install -r requirements.txt
-```
-
-## Quantization
-
-### Demo (`MXFP4`, `MXFP8`, `NVFP4`, `uNVFP4`)
-
-```bash
-python quantize.py  --model_name_or_path facebook/opt-125m --quantize --dtype MXFP4 --batch_size 8 --accuracy --enable_torch_compile
-```
-
-### Mix-precision Quantization (`MXFP4 + MXFP8`)
-
-```bash
-# Llama 3.1 8B
-python quantize.py  \
-    --model_name_or_path meta-llama/Llama-3.1-8B-Instruct \
-    --quantize \
-    --dtype MXFP4 \
-    --use_recipe \
-    --recipe_file recipes/Meta-Llama-3.1-8B-Instruct_7bits.json \
-    --accuracy \
-    --batch_size 32 \
-    --enable_torch_compile
-
-# Llama 3.3 70B
-deepspeed --include="localhost:0,1,2,3" --master_port=29500 quantize.py  \
-    --model_name_or_path meta-llama/Llama-3.3-70B-Instruct/ \
-    --quantize \
-    --dtype MXFP4 \
-    --use_recipe \
-    --recipe_file recipes/Meta-Llama-3.3-70B-Instruct_5bits.json \
-    --accuracy \
-    --batch_size 32
-```
-
-> Note: 
-> 1. Quantization applies `--dtype` for all blocks in the model by removing `--use_recipe`.
-> 2. Setting `--quant_lm_head` applies `--dtype` for the lm_head layer.
-> 3. Setting `--iters 0` skips AutoRound tuning and uses RTN method.
-> 4. The `deepspeed` usage provides quick accuracy verification.
-
-## Inference usage
-
-### NVFP4
-NVFP4 is supported by vLLM already, the saved model in this example follows the `llm_compressor` format, please refer to the usage in the public vLLM document.
-
-```bash
-# Command to save model:
-python quantize.py  --model_name_or_path facebook/opt-125m --quantize --dtype NVFP4 --batch_size 8 --save --save_path opt-125m-nvfp4 --save_format llm_compressor
-```
-
-### MXFP4 / MXFP8
-MXFP4 and MXFP8 is enabled in a forked vLLM repo, usages as below:
-```bash
-# Install the forked vLLM
-git clone -b cuda-mxfp8-moe --single-branch --quiet https://github.com/yiliu30/vllm-fork.git && cd vllm-fork
-USE_CPP=0 VLLM_USE_PRECOMPILED=1 pip install -e . -vvv && cd -
-
-# Command to save model:
-python quantize.py  \
-    --model_name_or_path meta-llama/Llama-3.3-70B-Instruct/ \
-    --quantize \
-    --iters 0 \
-    --dtype MXFP4 \
-    --save_path Llama-3.3-70B-Instruct-MXFP4 \
-    --save \
-    --save_format llm_compressor
-
-# Command to inference with vLLM:
-CUDA_VISIBLE_DEVICES=0,1 VLLM_USE_V1=0 VLLM_USE_MXFP4_CT_EMULATIONS=1 VLLM_LOGGING_LEVEL=DEBUG \
-vllm serve Llama-3.3-70B-Instruct-MXFP4 --tensor-parallel-size=2 --port 7777 --host localhost --trust-remote-code --dtype bfloat16 --enforce-eager
-export no_proxy="localhost, 127.0.0.1, ::1"
-curl -X POST http://localhost:7777/v1/completions \
-     -H "Content-Type: application/json" \
-     -d '{
-           "model": "/model_path/Llama-3.3-70B-Instruct-MXFP4",
-           "prompt": "Solve the following math problem step by step: What is 25 + 37? Please answer directly with the result.",
-           "max_tokens": 100,
-           "temperature": 0.7,
-           "top_p": 1.0
-         }'
-```
-> Note: To inference with transformers, please save model with `--save_format auto_round` and try `python run_hf_inf.py ${model_name_or_path}`
-
-### MXFP4 + MXFP8
-Model with mixed precision is not supported in vLLM, but supported in transformers in `auto-round` format. 
-
-```bash
-# Command to save model:
-python quantize.py  \
-    --model_name_or_path meta-llama/Llama-3.1-8B-Instruct \
-    --quantize \
-    --dtype MXFP4 \
-    --use_recipe \
-    --recipe_file recipes/Meta-Llama-3.1-8B-Instruct_7bits.json \
-    --save \
-    --save_format auto_round \
-    --save_path Llama-3.1-8B-Instruct-MXFP4-MXFP8-AR \
-    --enable_torch_compile
-
-# Command to inference with transformer:
-python run_hf_inf.py Llama-3.1-8B-Instruct-MXFP4-MXFP8-AR
-```
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
deleted file mode 100644
index 7cd4bc9996a..00000000000
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
+++ /dev/null
@@ -1,261 +0,0 @@
-# Copyright (c) 2025 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-
-import torch
-import transformers
-
-# For reproducibility
-torch.manual_seed(42)
-torch.use_deterministic_algorithms(True, warn_only=True)
-######################## HPU Memory Optimization ###########################
-# ensure that unnecessary memory is released during quantization.
-os.environ.setdefault("PT_HPU_LAZY_MODE", "1")
-os.environ.setdefault("PT_HPU_WEIGHT_SHARING", "0")
-if int(os.getenv("WORLD_SIZE", "0")) > 0:
-    os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0")
-    os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")
-from neural_compressor.torch.utils import is_hpex_available, world_size
-from auto_round import AutoRound
-
-if is_hpex_available():
-    import habana_frameworks.torch.core as htcore
-    from habana_frameworks.torch.hpu import wrap_in_hpu_graph
-
-    htcore.hpu_set_env()
-############################################################################
-
-
-def initialize_model_and_tokenizer(model_name_or_path):
-    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
-    config = transformers.AutoConfig.from_pretrained(model_name_or_path)
-    # using memory mapping with torch_dtype=config.torch_dtype
-    model = transformers.AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=config.torch_dtype)
-    # shard model for multi-cards and enable hpu graph
-
-    if world_size > 1:
-        ds_inference_kwargs = {
-            "dtype": config.torch_dtype,
-            "tensor_parallel": {"tp_size": world_size},
-        }
-        import deepspeed
-
-        ds_model = deepspeed.init_inference(model, **ds_inference_kwargs)
-        model = ds_model.module
-    model.eval()
-    return model, tokenizer
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Habana FP8 quantization.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--model_name_or_path", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct", help="model name or path"
-    )
-    parser.add_argument("--dtype", type=str, default="MXFP4", choices=["MXFP4", "MXFP8", "NVFP4", "NVFP4+", "uNVFP4"], help="data type")
-    parser.add_argument("--quantize", action="store_true", help="whether to quantize model")
-    parser.add_argument("--device_map", type=str, default=None, help="device map for model")
-    parser.add_argument("--use_recipe", action="store_true", help="whether to use recipe to quantize model")
-    parser.add_argument("--recipe_file", type=str, default="recipes/Meta-Llama-3.1-8B-Instruct_6bits.json", help="path of recipe file")
-    parser.add_argument("--mem_per_param_scale", default=13, type=int, help="memory per param scale factor")
-    parser.add_argument("--iters", default=200, type=int, help="iters for autoround.")
-    parser.add_argument("--seqlen", default=2048, type=int, help="sequence length for autoround.")
-    parser.add_argument("--nsamples", default=128, type=int, help="number of samples for autoround.")
-    parser.add_argument("--save", action="store_true", help="whether to save the quantized model")
-    parser.add_argument("--save_path", type=str, default="saved_results", help="path to save the quantized model")
-    parser.add_argument("--save_format", type=str, default="auto_round", help="format to save the quantized model")
-    parser.add_argument("--enable_torch_compile", action="store_true", help="whether to enable torch.compile")
-    parser.add_argument("--quant_lm_head", action="store_true", help="whether to quantize lm_head")
-    parser.add_argument("--accuracy", action="store_true", help="accuracy measurement")
-    parser.add_argument("--local_rank", type=int, default=0, metavar="N", help="Local process rank.")
-    parser.add_argument("--batch_size", default=32, type=int, help="batch size for accuracy evaluation.")
-    parser.add_argument(
-        "--tasks",
-        type=str,
-        nargs="+",
-        default=[
-            "piqa",
-            "hellaswag",
-            "mmlu",
-            "winogrande",
-            "lambada_openai",
-        ],
-        help="tasks for accuracy validation, text-generation and code-generation tasks are different.",
-    )
-    parser.add_argument("--limit", type=int, default=None, help="number of samples for accuracy evaluation")
-    args = parser.parse_args()
-
-    print("Target data type:", args.dtype)
-
-    model, tokenizer = initialize_model_and_tokenizer(args.model_name_or_path)
-    device="hpu" if is_hpex_available() else "cuda"
-    # in case that model is set to cuda:0 by default
-    if args.device_map.isdigit() and device=="cuda":
-        device = f"{device}:{args.device_map}"
-
-    if args.quantize:
-        if args.dtype in ["uNVFP4", "NVFP4+"]:
-            from auto_round.schemes import QuantizationScheme
-
-            uNVFP4 = QuantizationScheme.from_dict(
-                {
-                    "bits": 4,
-                    "group_size": 16,
-                    "data_type": "fp4_v2",
-                    "act_bits": 4,
-                    "act_data_type": "fp4_v2",
-                    "act_group_size": 16,
-                    "act_sym": True,
-                }
-            )
-            args.dtype = uNVFP4
-
-        if args.quant_lm_head:
-            layer_config = {"lm_head": args.dtype}
-
-        autoround = AutoRound(
-            model,
-            tokenizer,
-            device=device,
-            device_map="tp" if world_size > 1 else args.device_map,
-            iters=args.iters,
-            seqlen=args.seqlen,
-            nsamples=args.nsamples,
-            low_gpu_mem_usage=True,
-            scheme=args.dtype,
-            layer_config=layer_config if args.quant_lm_head else None,
-            enable_torch_compile=args.enable_torch_compile,
-            mem_per_param_scale=args.mem_per_param_scale,
-        )
-
-        if args.use_recipe:
-            ############ load recipe results (MXFP4 + MXFP8) ############
-            def load_recipe_results(file_path):
-                import json
-                with open(file_path, "r") as f:
-                    return json.load(f)
-
-            layer_config = load_recipe_results(args.recipe_file)
-            if args.quant_lm_head:
-                # ensure lm_head is quantized with mxfp8_config
-                layer_config.update({"lm_head": "MXFP8"})
-                print("In recipe mode, lm_head is quantized with MXFP8.")
-            autoround.layer_config = layer_config
-
-        # A placeholder, to pass assertion in AutoRound
-        autoround.formats = "auto_round"
-        autoround.quantize()
-        model = autoround.model
-
-    if args.accuracy:
-        # set dtype to BF16 for HPU inference performance
-        model = model.to(torch.bfloat16)
-        model = model.eval().to(device)
-        if is_hpex_available():
-            # HPU needs padding to buckets for better performance
-            # Generation tasks, such as gsm8k and mmlu-pro, may get OOM.
-            model = wrap_in_hpu_graph(model)
-            htcore.hpu_inference_initialize(model, mark_only_scales_as_const=True)
-            from neural_compressor.evaluation.lm_eval import LMEvalParser, evaluate
-
-            tasks = ",".join(args.tasks)
-            eval_args = LMEvalParser(
-                model="hf",
-                user_model=model,
-                tokenizer=tokenizer,
-                batch_size=args.batch_size,
-                tasks=tasks,
-                device="hpu",
-                pad_to_buckets=True,
-                limit=args.limit,
-                add_bos_token=True,
-            )
-            results = evaluate(eval_args)
-            torch.hpu.synchronize()
-            all_accuracy = {}
-            for task_name, task_results in results["results"].items():
-                if task_name in ["hellaswag", "lambada_openai", "piqa", "winogrande", "mmlu"]:
-                    accu = task_results["acc,none"]
-                    all_accuracy[task_name] = accu
-                    print(f"Accuracy for {task_name}: {accu:.4f}")
-            print(f"Overall accuracy: {sum(all_accuracy.values())/len(all_accuracy):.4f}")
-        else:
-            # CUDA evaluation support all tasks.
-            # gsm8k requires add_bos_token=False for better accuracy for llama model.
-            args.tasks = ["piqa", "hellaswag", "mmlu", "gsm8k"]
-            all_accuracy = {}
-            test_gsm8k = False
-            test_normal = False
-            if "gsm8k" in args.tasks:
-                test_gsm8k = True
-                args.tasks.remove("gsm8k")
-            if args.tasks:
-                test_normal = True
-            import lm_eval
-            from lm_eval.models.huggingface import HFLM
-
-            ########################## gms8k (ahead of normal tasks) #########################
-            if test_gsm8k:
-                lm = HFLM(
-                    pretrained=model,
-                    tokenizer=tokenizer,
-                    add_bos_token=False,
-                    batch_size=args.batch_size,
-                )
-                results_gsm8k = lm_eval.simple_evaluate(
-                    lm,
-                    tasks=["gsm8k"],
-                    limit=args.limit,
-                )
-                for task_name, task_results in results_gsm8k["results"].items():
-                    accu = task_results["exact_match,strict-match"]
-                    all_accuracy[task_name] = accu
-            ########################## gms8k end #########################
-            if test_normal:
-                lm = HFLM(
-                    pretrained=model,
-                    tokenizer=tokenizer,
-                    add_bos_token=True,
-                    batch_size=args.batch_size,
-                )
-                results = lm_eval.simple_evaluate(
-                    lm,
-                    tasks=args.tasks,
-                    limit=args.limit,
-                )
-                for task_name, task_results in results["results"].items():
-                    if task_name in ["hellaswag", "lambada_openai", "piqa", "winogrande", "mmlu"]:
-                        accu = task_results["acc,none"]
-                        all_accuracy[task_name] = accu
-            for task_name, accu in all_accuracy.items():
-                print(f"Accuracy for {task_name}: {accu:.4f}")
-            print(f"Overall accuracy: {sum(all_accuracy.values())/len(all_accuracy):.4f}")
-
-    if args.save:
-        if args.dtype == "NVFP4":
-            # using llm_compressor format to save nv_fp4 model
-            autoround.save_quantized(args.save_path, format=args.save_format)
-        else:
-            # using auto_round format to save mx_fp4 and mx_fp8 model
-            if world_size > 1:
-                print(f"Suggest to save model without sharding for better reload experience.")
-                print(f"Setting`--device_map 0,1,2,3` provides pipeline parallel instead of deepspeed tensor parallel.")
-                output_dir = args.save_path + "/" + args.local_rank + "_" + args.world_size
-                autoround.save_quantized(output_dir, format=args.save_format)
-            else:
-                autoround.save_quantized(args.save_path, format=args.save_format)
-        print(f"Quantized model in {args.save_format} format is saved to {args.save_path}")
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
deleted file mode 100644
index 49b4e3a56d6..00000000000
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
+++ /dev/null
@@ -1,2242 +0,0 @@
-{
-    "model.layers.0.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    }
-}
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
deleted file mode 100644
index 105c6daa492..00000000000
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
+++ /dev/null
@@ -1,5602 +0,0 @@
-{
-    "model.layers.0.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.0.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.1.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.2.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.3.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.4.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.5.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.6.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.7.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.8.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.9.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.10.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.11.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.12.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.13.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.14.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.15.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.16.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.17.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.18.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.19.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.20.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.21.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.22.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.23.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.24.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.25.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.26.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.27.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.28.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.29.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.30.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.31.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.32.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.33.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.34.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.35.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.36.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.37.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.38.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.39.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.40.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.41.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.42.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.43.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.44.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.45.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.46.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.47.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.48.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.49.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.50.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.51.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.52.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.53.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.54.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.55.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.56.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.57.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.58.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.59.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.60.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.61.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.62.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.63.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.64.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.65.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.66.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.67.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.68.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.69.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.70.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.71.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.72.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.73.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.74.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.75.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.76.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.77.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.78.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.self_attn.q_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.self_attn.k_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.self_attn.v_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.self_attn.o_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 4,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 4,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.mlp.gate_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.mlp.up_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    },
-    "model.layers.79.mlp.down_proj": {
-        "data_type": "mx_fp",
-        "act_data_type": "mx_fp_rceil",
-        "bits": 8,
-        "group_size": 32,
-        "sym": true,
-        "act_bits": 8,
-        "act_group_size": 32,
-        "act_sym": true
-    }
-}
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py
deleted file mode 100644
index 06f479609ec..00000000000
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import torch
-import sys
-
-
-quantized_model_path = sys.argv[1]
-print("model name or path:", quantized_model_path)
-with torch.no_grad(), torch.device("cuda"):
-    import transformers
-
-    model = transformers.AutoModelForCausalLM.from_pretrained(
-        quantized_model_path,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True,
-        device_map="auto",
-    )
-    tokenizer = transformers.AutoTokenizer.from_pretrained(quantized_model_path)
-    prompt = "Solve the following math problem step by step: What is 25 + 37? Please answer directly with the result."
-
-    encode = tokenizer.encode(prompt, return_tensors="pt")
-    with torch.no_grad():
-        output_tokens = model.generate(
-            encode,
-            max_length=200,
-        )
-        output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-        print(f"Prompt: {prompt}")
-        print(f"Output: {output}")
-        assert output is not None, "Output should not be None"
\ No newline at end of file
diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index b8c1eac83ab..2ad0eb61744 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -321,6 +321,7 @@ def to_dict(self):
                 result[GLOBAL] = global_config
         else:
             result = global_config
+        result.pop("params_list", None)  # Internal parameters
         return result
 
     def get_params_dict(self):
@@ -531,7 +532,7 @@ def expand(self) -> List[BaseConfig]:
             # Assign the options to the `TuningParam` instance
             param_val = getattr(config, tuning_param.name)
             if param_val is not None:
-                if tuning_param.is_tunable(param_val):
+                if param not in self.non_tunable_params and tuning_param.is_tunable(param_val):
                     tuning_param.options = param_val
                     tuning_param_list.append(tuning_param)
                 else:
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index 2ab9aead9e2..1c1821c3891 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -211,7 +211,8 @@ def __init__(
         self.super_group_size = super_group_size
         self.batch_size = batch_size
         self.amp = amp
-        self.device = get_accelerator(kwargs.pop("device", "auto")).name()
+        self.accelerator = get_accelerator(kwargs.pop("device", "auto"))
+        self.device = self.accelerator.name()
         self.lr_scheduler = lr_scheduler
         self.dataset = dataset
         self.enable_quanted_input = enable_quanted_input
@@ -302,6 +303,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
                 device_map=self.auto_scheme_device_map,
                 low_gpu_mem_usage=self.low_gpu_mem_usage,
             )
+
         rounder = AutoRound(
             model,
             layer_config=self.layer_config,
@@ -349,6 +351,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             enable_norm_bias_tuning=self.enable_norm_bias_tuning,
             truncation=self.truncation,
             enable_torch_compile=self.enable_torch_compile,
+            # TODO: AutoRound is using layer_config to quantize lm_head, remove it.
             quant_lm_head=self.quant_lm_head,
             guidance_scale=self.guidance_scale,
             num_inference_steps=self.num_inference_steps,
@@ -370,6 +373,18 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
 
         dump_model_op_stats(rounder.layer_config)
 
+        if self.export_format in ["auto_round", "llm_compressor"]:
+            # the directly returned model is QuantLinear, which is used for packing.
+            try:
+                del model
+                self.accelerator.empty_cache()
+                logger.info("Quantization is done, reloading model from saved directory...")
+                import transformers  # pylint: disable=E0401
+
+                model = transformers.AutoModelForCausalLM.from_pretrained(self.output_dir)
+            except:
+                pass
+
         return model
 
 
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index dd1bc132776..b1183ef3c64 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -85,6 +85,10 @@ class TorchBaseConfig(BaseConfig):
 
     # re-write func _get_op_name_op_type_config to fallback op_type with string
     # because there are some special op_types for IPEX backend: `Linear&Relu`, `Linear&add`, ...
+    def __init__(self, white_list):
+        super().__init__(white_list)
+        self.non_tunable_params: List[str] = ["white_list"]
+
     def _get_op_name_op_type_config(self):
         op_type_config_dict = dict()
         op_name_config_dict = dict()
@@ -103,8 +107,6 @@ def _get_op_name_op_type_config(self):
     def _generate_params_list(cls) -> List[str]:
         sig = inspect.signature(cls.__init__)
         params_list = list(sig.parameters.keys())[1:]
-        if "white_list" in params_list:
-            params_list.remove("white_list")
         if "args" in params_list:
             params_list.remove("args")
         if "kwargs" in params_list:
@@ -118,26 +120,6 @@ class RTNConfig(TorchBaseConfig):
     """Config class for round-to-nearest weight-only quantization."""
 
     name = RTN
-    params_list = [
-        "dtype",
-        "bits",
-        "use_sym",
-        "group_size",
-        "group_dim",
-        "use_full_range",
-        "use_mse_search",
-        # layer wise params
-        "use_layer_wise",
-        "model_path",
-        # double quant
-        "use_double_quant",
-        "double_quant_dtype",
-        "double_quant_bits",
-        "double_quant_use_sym",
-        "double_quant_group_size",
-        # quant_lm_head
-        "quant_lm_head",
-    ]
     supported_configs: List[OperatorConfig] = []
 
     def __init__(
@@ -346,32 +328,6 @@ class GPTQConfig(TorchBaseConfig):
 
     name = GPTQ
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "dtype",
-        "bits",
-        "use_sym",
-        "group_size",
-        "use_mse_search",
-        "use_double_quant",
-        "double_quant_dtype",
-        "double_quant_bits",
-        "double_quant_use_sym",
-        "double_quant_group_size",
-        # layer wise params
-        "use_layer_wise",
-        "use_block_wise",
-        "model_path",
-        # quant lm_head
-        "quant_lm_head",
-        # gptq params
-        "act_order",
-        "hybrid_order",
-        "fp8_aware",
-        "percdamp",
-        "block_size",
-        "static_groups",
-        "true_sequential",
-    ]
 
     def __init__(
         self,
@@ -574,28 +530,6 @@ class AWQConfig(TorchBaseConfig):
     """
 
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "dtype",
-        "bits",
-        "group_size",
-        "group_dim",
-        "use_sym",
-        "use_full_range",
-        "use_mse_search",
-        "use_layer_wise",
-        "use_double_quant",
-        "double_quant_dtype",
-        "double_quant_bits",
-        "double_quant_use_sym",
-        "double_quant_group_size",
-        # quant_lm_head
-        "quant_lm_head",
-        # AWQ params
-        "use_auto_scale",
-        "use_auto_clip",
-        "folding",
-        "absorb_layer_dict",
-    ]
     name = AWQ
 
     def __init__(
@@ -756,26 +690,6 @@ class TEQConfig(TorchBaseConfig):
     """
 
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "dtype",
-        "bits",
-        "group_size",
-        "group_dim",
-        "use_sym",
-        "use_full_range",
-        "use_mse_search",
-        "use_layer_wise",
-        "use_double_quant",
-        "double_quant_dtype",
-        "double_quant_bits",
-        "double_quant_use_sym",
-        "double_quant_group_size",
-        # quant_lm_head
-        "quant_lm_head",
-        # TEQ params
-        "absorb_to_layer",
-        "folding",
-    ]
     name = TEQ
 
     def __init__(
@@ -926,25 +840,6 @@ class AutoRoundConfig(TorchBaseConfig):
     """
 
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "dtype",
-        "bits",
-        "group_size",
-        "use_sym",
-        # autoround params
-        "enable_full_range",
-        "batch_size",
-        "enable_minmax_tuning",
-        "lr",
-        "minmax_lr",
-        "iters",
-        "seqlen",
-        "nsamples",
-        "nblocks",
-        "gradient_accumulate_steps",
-        "not_use_best_mse",
-        "dynamic_max_gap",
-    ]
     name = AUTOROUND
 
     def __init__(
@@ -1000,13 +895,14 @@ def __init__(
         # v0.8
         enable_adam: bool = False,
         # v0.9: auto scheme parameters
-        target_bits: int = None,
+        target_bits: float = None,
         options: Union[str, list[Union[str]], tuple[Union[str], ...]] = ("MXFP4", "MXFP8"),
         shared_layers: Optional[Iterable[Iterable[str]]] = None,
         ignore_scale_zp_bits: bool = False,
         auto_scheme_method: str = "default",
         auto_scheme_device_map: str = None,
         auto_scheme_batch_size: int = None,
+        output_dir: str = "./temp_auto_round",
         # Tuning space
         white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
         **kwargs,
@@ -1060,17 +956,19 @@ def __init__(
             device_map: The device to be used for tuning.
             scheme (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations.
             white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
-            target_bits (int): The target bit width for quantization (default is None).
+            target_bits (float): The target bit width for quantization (default is None).
                 options (Union[str, list[Union[str]], tuple[Union[str], ...]]): The options for mixed-precision quantization.
                 shared_layers (Optional[Iterable[Iterable[str]]]): The shared layers for mixed-precision quantization.
                 ignore_scale_zp_bits (bool): Whether to ignore scale and zero-point bits (default is False).
                 auto_scheme_method (str): The method for automatic scheme selection (default is "default").
                 auto_scheme_device_map (str): The device map for automatic scheme selection (default is None).
                 auto_scheme_batch_size (int): The batch size for automatic scheme selection (default is 8).
+            output_dir (str): The output directory for temporary files (default is "./temp_auto_round").
         """
         super().__init__(white_list=white_list)
         self.params_list = self.__class__._generate_params_list()
-        self.params_list.remove("options")  # option is a list but not a tunable parameter
+        # these two params are lists but not tunable
+        self.non_tunable_params.extend(["options", "shared_layers"])
 
         self.enable_full_range = enable_full_range
         self.batch_size = batch_size
@@ -1124,6 +1022,7 @@ def __init__(
         self.auto_scheme_method = auto_scheme_method
         self.auto_scheme_device_map = auto_scheme_device_map
         self.auto_scheme_batch_size = auto_scheme_batch_size
+        self.output_dir = output_dir
         # add kwargs
         for k, v in kwargs.items():
             setattr(self, k, v)
@@ -1236,14 +1135,6 @@ class MXQuantConfig(TorchBaseConfig):
     """Config class for MX quantization."""
 
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "w_dtype",
-        "act_dtype",
-        "out_dtype",
-        "blocksize",
-        "round_method",
-        "weight_only",
-    ]
     name = MX_QUANT
 
     def __init__(
@@ -1362,16 +1253,6 @@ class DynamicQuantConfig(TorchBaseConfig):
     """Config class for dynamic quantization."""
 
     name = PT2E_DYNAMIC_QUANT
-    params_list = [
-        "w_dtype",
-        "w_sym",
-        "w_granularity",
-        "w_algo",
-        "act_dtype",
-        "act_sym",
-        "act_granularity",
-        "act_algo",
-    ]
     supported_configs: List[OperatorConfig] = []
 
     def __init__(
@@ -1457,17 +1338,6 @@ class INT8StaticQuantConfig(TorchBaseConfig):
     """Config class for static quantization."""
 
     name = STATIC_QUANT
-    params_list = [
-        "w_dtype",
-        "w_sym",
-        "w_granularity",
-        "w_algo",
-        "act_dtype",
-        "act_sym",
-        "act_granularity",
-        "act_algo",
-        "excluded_precisions",
-    ]
     supported_configs: List[OperatorConfig] = []
 
     def __init__(
@@ -1616,21 +1486,6 @@ class SmoothQuantConfig(TorchBaseConfig):
     """Config class for smooth quantization."""
 
     name = SMOOTH_QUANT
-    params_list = [
-        "w_dtype",
-        "w_sym",
-        "w_granularity",
-        "w_algo",
-        "act_dtype",
-        "act_sym",
-        "act_granularity",
-        "act_algo",
-        "excluded_precisions",
-        "alpha",
-        "folding",
-        "scale_sharing",
-        "auto_alpha_args",
-    ]
     supported_configs: List[OperatorConfig] = []
 
     def __init__(
@@ -1777,14 +1632,6 @@ class HQQConfig(TorchBaseConfig):
     """
 
     name = HQQ
-    params_list = [
-        "bits",
-        "group_size",
-        "quant_zero",
-        "quant_scale",
-        "scale_quant_group_size",
-        "quant_lm_head",
-    ]
     supported_configs: List[OperatorConfig] = []
 
     def __init__(
@@ -2101,9 +1948,6 @@ class MixedPrecisionConfig(TorchBaseConfig):
 
     name = MIXED_PRECISION
     supported_configs: List[OperatorConfig] = []
-    params_list = [
-        "dtype",
-    ]
     supported_half_precision_ops = (
         torch.nn.Linear,
         torch.nn.Conv1d,
diff --git a/neural_compressor/torch/utils/auto_accelerator.py b/neural_compressor/torch/utils/auto_accelerator.py
index f66e1da9d54..ac9b25aeeee 100644
--- a/neural_compressor/torch/utils/auto_accelerator.py
+++ b/neural_compressor/torch/utils/auto_accelerator.py
@@ -24,6 +24,7 @@
 
 # To keep it simply, only add the APIs we need.
 
+import gc
 import os
 from abc import ABC, abstractmethod
 from enum import Enum, auto
@@ -206,7 +207,7 @@ def device(self, device_index=None):
 
     def empty_cache(self):
         """Do nothing."""
-        pass
+        gc.collect()
 
     def synchronize(self):
         """Do nothing."""

From 709cc71876243e856caf713ee6969c3123b49b7f Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Tue, 25 Nov 2025 03:50:08 -0500
Subject: [PATCH 02/10] update requirement

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 .../quantization/auto_round/llama3/README.md     | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
index d33d8090ed3..8c5ae9a028d 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -6,13 +6,25 @@ In this example, you can verify the accuracy on HPU/CUDA device with emulation o
 
 ```bash
 # neural-compressor-pt
-pip install neural-compressor-pt>=3.6
+pip install neural-compressor-pt==3.7
 # auto-round
-pip install auto-round>=0.8.0
+pip install auto-round==0.9.1
 # other requirements
 pip install -r requirements.txt
 ```
 
+**Before neural-compressor v3.7 and auto-round v0.9.1 release, please install from source for the latest updates:**
+
+```bash 
+# neural-compressor-pt
+INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master
+# auto-round
+pip install git+https://github.com/intel/auto-round.git@main
+# other requirements
+pip install -r requirements.txt
+```
+
+
 ## Quantization
 
 ### Demo (`MXFP4`, `MXFP8`, `NVFP4`, `uNVFP4`)

From cc25af55de9420e7e8b18768b11324bcd937ee04 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 00:55:59 -0500
Subject: [PATCH 03/10] add run_quant run_benchmark

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 .../quantization/auto_round/llama3/README.md  | 127 +++++--------
 .../auto_round/llama3/quantize.py             |   2 +-
 .../auto_round/llama3/run_benchmark.sh        | 119 +++++++++++++
 .../auto_round/llama3/run_quant.sh            | 167 ++++++++++++++++++
 .../torch/algorithms/weight_only/autoround.py |   2 +
 5 files changed, 338 insertions(+), 79 deletions(-)
 create mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_benchmark.sh
 create mode 100644 examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
index 8c5ae9a028d..d8c842eaab2 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -84,81 +84,37 @@ CUDA_VISIBLE_DEVICES=0 python quantize.py  \
 AutoRound helps improve the accuracy, `iters` and `nsamples` is higher than default.
 ```bash
 # Quantize and export AutoRound format
-CUDA_VISIBLE_DEVICES=0 python quantize.py \
-    --model_name_or_path /models/Meta-Llama-3.1-8B-Instruct \
-    --quantize \
-    --dtype MXFP8 \
-    --iters 1000 \
-    --nsamples 512 \
-    --enable_torch_compile \
-    --low_gpu_mem_usage \
-    --export_format auto_round \
-    --export_path Llama-3.1-8B-MXFP8
+CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-8B --dtype=mxfp8 --input_model=/models/Meta-Llama-3.1-8B-Instruct --output_model=Llama-3.1-8B-MXFP8
 ```
 
 
-#### Llama 3.1 8B MXFP4 (Mixed with MXFP8)
+#### Llama 3.1 8B MXFP4 (Mixed with MXFP8, Target_bits=7.8)
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 python quantize.py \
-    --model_name_or_path /models/Meta-Llama-3.1-8B-Instruct \
-    --quantize \
-    --target_bits 7.8 \
-    --options "MXFP4" "MXFP8" \
-    --shared_layer "k_proj" "v_proj" "q_proj" \
-    --shared_layer "gate_proj" "up_proj" \
-    --enable_torch_compile \
-    --low_gpu_mem_usage \
-    --export_format auto_round \
-    --export_path Llama-3.1-8B-MXFP4-MXFP8
+CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-8B --dtype=mxfp4_mixed --input_model=/models/Meta-Llama-3.1-8B-Instruct --output_model=Llama-3.1-8B-MXFP4-MXFP8
 ```
 
 #### Llama 3.3 70B MXFP8
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 python quantize.py  \
-    --model_name_or_path /models/Llama-3.3-70B-Instruct/ \
-    --quantize \
-    --dtype MXFP8 \
-    --quant_lm_head \
-    --iters 0 \
-    --enable_torch_compile \
-    --low_gpu_mem_usage \
-    --export_format auto_round \
-    --export_path Llama-3.3-70B-MXFP8
+CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.3-70B --dtype=mxfp8 --input_model=/models/Llama-3.3-70B-Instruct/ --output_model=Llama-3.3-70B-MXFP8
 ```
 
-#### Llama 3.3 70B MXFP4 (Mixed with MXFP8)
+#### Llama 3.3 70B MXFP4 (Mixed with MXFP8, Target_bits=5.8)
 ```bash
-CUDA_VISIBLE_DEVICES=0 python quantize.py  \
-    --model_name_or_path /models/Llama-3.3-70B-Instruct/ \
-    --quantize \
-    --target_bits 5.8 \
-    --options "MXFP4" "MXFP8" \
-    --shared_layer "k_proj" "v_proj" "q_proj" \
-    --shared_layer "gate_proj" "up_proj" \
-    --enable_torch_compile \
-    --low_gpu_mem_usage \
-    --export_format auto_round \
-    --export_path Llama-3.3-70B-MXFP4-MXFP8
+CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.3-70B --dtype=mxfp4_mixed --input_model=/models/Llama-3.3-70B-Instruct/ --output_model=Llama-3.3-70B-MXFP4-MXFP8
 ```
 
-#### Llama 3.1 70B uNVFP4
+#### Llama 3.1 70B MXFP8
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 python quantize.py  \
-    --model_name_or_path /models/Llama-3.1-70B-Instruct/ \
-    --quantize \
-    --dtype uNVFP4 \
-    --quant_lm_head \
-    --iters 0 \
-    --enable_torch_compile \
-    --low_gpu_mem_usage \
-    --export_format fake \
-    --export_path Llama-3.1-70B-uNVFP4 \
-    --accuracy
+CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-70B --dtype=mxfp8 --input_model=/models/Llama-3.1-70B-Instruct/ --output_model=Llama-3.1-70B-MXFP8
 ```
+#### Llama 3.1 70B uNVFP4
 
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_quant.sh --topology=Llama-3.1-70B --dtype=unvfp4 --input_model=/models/Llama-3.1-70B-Instruct/ --output_model=Llama-3.1-70B-uNVFP4
+```
 Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES` or reducing `eval_batch_size` is suggested.
 
 ## Inference
@@ -177,28 +133,43 @@ git checkout fused-moe-ar
 VLLM_USE_PRECOMPILED=1 pip install -e .
 ```
 
-#### Accuracy Evaluation
-```bash
-# add_bos_token=True helps accuracy for general tasks
-VLLM_ENABLE_AR_EXT=1 \
-TORCH_COMPILE_DISABLE=1 \
-CUDA_VISIBLE_DEVICES=0 \
-lm_eval --model vllm \
-    --model_args pretrained=Llama-3.1-8B-MXFP4-MXFP8,add_bos_token=True,tensor_parallel_size=1,data_parallel_size=1 \
-    --tasks piqa,hellaswag,mmlu \
-    --batch_size 8 &
-wait
-# add_bos_token=True helps accuracy for GSM8K
-VLLM_ENABLE_AR_EXT=1 \
-TORCH_COMPILE_DISABLE=1 \
-CUDA_VISIBLE_DEVICES=0 \
-lm_eval --model vllm \
-    --model_args pretrained=Llama-3.1-8B-MXFP4-MXFP8,add_bos_token=False,tensor_parallel_size=1,data_parallel_size=1 \
-    --tasks gsm8k \
-    --batch_size 8
-```
-
-Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES`+`tensor_parallel_size` or reducing `batch_size` is suggested.
+#### MXFP Benchmark Script
+
+For convenience, we provide a benchmark script that automatically handles GPU detection and tensor parallelism configuration:
+
+**All 5 MXFP benchmark cases:**
+
+1. **Llama 3.1 8B MXFP8** (1 GPU):
+```bash
+CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh --model_path=Llama-3.1-8B-MXFP8
+```
+
+2. **Llama 3.1 8B MXFP4 Mixed** (1 GPU):
+```bash
+CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh --model_path=Llama-3.1-8B-MXFP4-MXFP8
+```
+
+3. **Llama 3.3 70B MXFP8** (4 GPU):
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh --model_path=Llama-3.3-70B-MXFP8
+```
+
+4. **Llama 3.3 70B MXFP4 Mixed** (4 GPU):
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh --model_path=Llama-3.3-70B-MXFP4-MXFP8
+```
+
+5. **Llama 3.1 70B MXFP8** (4 GPU):
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh --model_path=Llama-3.1-70B-MXFP8
+```
+
+The script automatically:
+- Detects available GPUs from `CUDA_VISIBLE_DEVICES` and sets `tensor_parallel_size` accordingly
+- Handles different `add_bos_token` settings for different tasks (GSM8K requires `False`, others use `True`)
+- Runs default tasks: `piqa,hellaswag,mmlu,gsm8k` with batch size 8
+- Supports custom task selection and batch size adjustment
+
 
 ### NVFP4
 NVFP4 is supported by vLLM already, please set `llm_compressor` format for exporting during quantization.
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
index 39d51edfc8f..30878475fb3 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/quantize.py
@@ -28,7 +28,7 @@
 if int(os.getenv("WORLD_SIZE", "0")) > 0:
     os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0")
     os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")
-from neural_compressor.torch.utils import is_hpex_available, world_size
+from neural_compressor.torch.utils import is_hpex_available
 from neural_compressor.torch.quantization import autotune, prepare, convert, AutoRoundConfig, TuningConfig
 
 if is_hpex_available():
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_benchmark.sh
new file mode 100644
index 00000000000..87b635be52f
--- /dev/null
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_benchmark.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# Usage: CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh --model_path=<path_to_quantized_model> [--tasks=<tasks>] [--batch_size=<size>]
+
+# Parse command line arguments
+TASKS="piqa,hellaswag,mmlu,gsm8k"
+BATCH_SIZE=8
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --model_path=*)
+            MODEL_PATH="${1#*=}"
+            shift
+            ;;
+        --tasks=*)
+            TASKS="${1#*=}"
+            shift
+            ;;
+        --batch_size=*)
+            BATCH_SIZE="${1#*=}"
+            shift
+            ;;
+        *)
+            echo "Unknown parameter: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [[ -z "$MODEL_PATH" ]]; then
+    echo "Usage: bash run_benchmark.sh --model_path=<path_to_quantized_model> [--tasks=<tasks>] [--batch_size=<size>]"
+    echo "Example: CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh --model_path=Llama-3.1-8B-MXFP8"
+    exit 1
+fi
+
+# Count available GPUs and set tensor_parallel_size
+if [[ -n "$CUDA_VISIBLE_DEVICES" ]]; then
+    # Count comma-separated GPU IDs
+    IFS=',' read -ra GPU_ARRAY <<< "$CUDA_VISIBLE_DEVICES"
+    TENSOR_PARALLEL_SIZE=${#GPU_ARRAY[@]}
+else
+    TENSOR_PARALLEL_SIZE=1
+fi
+
+echo "Running benchmark with parameters:"
+echo "  Model Path: $MODEL_PATH"
+echo "  Tasks: $TASKS"
+echo "  Batch Size: $BATCH_SIZE"
+echo "  Tensor Parallel Size: $TENSOR_PARALLEL_SIZE"
+echo "  CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
+
+# Check if the model exists
+if [[ ! -d "$MODEL_PATH" ]]; then
+    echo "Error: Model path '$MODEL_PATH' does not exist!"
+    exit 1
+fi
+
+# Set common environment variables
+export VLLM_ENABLE_AR_EXT=1
+export TORCH_COMPILE_DISABLE=1
+
+# Function to run evaluation for specific tasks
+run_evaluation() {
+    local tasks=$1
+    local add_bos_token=$2
+    
+    echo "Running evaluation for tasks: $tasks (add_bos_token=$add_bos_token)"
+    
+    # Print the command being executed
+    local cmd="lm_eval --model vllm --model_args pretrained=\"$MODEL_PATH\",add_bos_token=$add_bos_token,tensor_parallel_size=$TENSOR_PARALLEL_SIZE,data_parallel_size=1 --tasks $tasks --batch_size $BATCH_SIZE"
+    echo "Executing command: $cmd"
+    
+    lm_eval --model vllm \
+        --model_args pretrained="$MODEL_PATH",add_bos_token=$add_bos_token,tensor_parallel_size=$TENSOR_PARALLEL_SIZE,data_parallel_size=1 \
+        --tasks $tasks \
+        --batch_size $BATCH_SIZE
+        
+    if [[ $? -ne 0 ]]; then
+        echo "Error: Evaluation failed for tasks: $tasks"
+        return 1
+    fi
+}
+
+# Check if tasks contain gsm8k (requires add_bos_token=False)
+if [[ "$TASKS" == *"gsm8k"* ]]; then
+    # If gsm8k is the only task
+    if [[ "$TASKS" == "gsm8k" ]]; then
+        run_evaluation "$TASKS" false
+    else
+        # Split tasks: run gsm8k separately with add_bos_token=False
+        OTHER_TASKS=$(echo "$TASKS" | sed 's/,*gsm8k,*//' | sed 's/^,//' | sed 's/,$//')
+        
+        if [[ -n "$OTHER_TASKS" ]]; then
+            echo "Running general tasks with add_bos_token=True"
+            run_evaluation "$OTHER_TASKS" true
+            
+            if [[ $? -eq 0 ]]; then
+                echo "Running GSM8K with add_bos_token=False"
+                run_evaluation "gsm8k" false
+            else
+                echo "Skipping GSM8K due to previous failure"
+                exit 1
+            fi
+        else
+            run_evaluation "gsm8k" false
+        fi
+    fi
+else
+    # No gsm8k task, use add_bos_token=True for all tasks
+    run_evaluation "$TASKS" true
+fi
+
+if [[ $? -eq 0 ]]; then
+    echo "Benchmark completed successfully!"
+else
+    echo "Benchmark failed!"
+    exit 1
+fi
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh
new file mode 100644
index 00000000000..43ba9c2d18d
--- /dev/null
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+
+# Usage: CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-8B --dtype=mxfp8 --input_model=/models/Meta-Llama-3.1-8B-Instruct --output_model=Llama-3.1-8B-MXFP8
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --topology=*)
+            TOPOLOGY="${1#*=}"
+            shift
+            ;;
+        --dtype=*)
+            DTYPE="${1#*=}"
+            shift
+            ;;
+        --input_model=*)
+            INPUT_MODEL="${1#*=}"
+            shift
+            ;;
+        --output_model=*)
+            OUTPUT_MODEL="${1#*=}"
+            shift
+            ;;
+        *)
+            echo "Unknown parameter: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [[ -z "$TOPOLOGY" || -z "$DTYPE" || -z "$INPUT_MODEL" || -z "$OUTPUT_MODEL" ]]; then
+    echo "Usage: bash run_quant.sh --topology=<topology> --dtype=<dtype> --input_model=<input_model> --output_model=<output_model>"
+    echo "Supported topologies: Llama-3.1-8B, Llama-3.3-70B, Llama-3.1-70B"
+    echo "Supported dtypes: mxfp8, mxfp4_mixed, unvfp4"
+    exit 1
+fi
+
+echo "Starting quantization with parameters:"
+echo "  Topology: $TOPOLOGY"
+echo "  Data Type: $DTYPE"
+echo "  Input Model: $INPUT_MODEL"
+echo "  Output Model: $OUTPUT_MODEL"
+
+# Set common parameters
+COMMON_ARGS="--quantize --enable_torch_compile --low_gpu_mem_usage --export_format auto_round"
+
+case "$TOPOLOGY" in
+    "Llama-3.1-8B")
+        case "$DTYPE" in
+            "mxfp8")
+                echo "Running Llama 3.1 8B MXFP8 quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --dtype MXFP8 --iters 1000 --nsamples 512 --export_path \"$OUTPUT_MODEL\""
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    $COMMON_ARGS \
+                    --dtype MXFP8 \
+                    --iters 1000 \
+                    --nsamples 512 \
+                    --export_path "$OUTPUT_MODEL"
+                ;;
+            "mxfp4_mixed")
+                echo "Running Llama 3.1 8B MXFP4 (Mixed with MXFP8) quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --target_bits 7.8 --options \"MXFP4\" \"MXFP8\" --shared_layer \"k_proj\" \"v_proj\" \"q_proj\" --shared_layer \"gate_proj\" \"up_proj\" --export_path \"$OUTPUT_MODEL\""
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    $COMMON_ARGS \
+                    --target_bits 7.8 \
+                    --options "MXFP4" "MXFP8" \
+                    --shared_layer "k_proj" "v_proj" "q_proj" \
+                    --shared_layer "gate_proj" "up_proj" \
+                    --export_path "$OUTPUT_MODEL"
+                ;;
+            *)
+                echo "Error: Unsupported dtype '$DTYPE' for topology '$TOPOLOGY'"
+                echo "Supported dtypes for Llama-3.1-8B: mxfp8, mxfp4_mixed"
+                exit 1
+                ;;
+        esac
+        ;;
+    "Llama-3.3-70B")
+        case "$DTYPE" in
+            "mxfp8")
+                echo "Running Llama 3.3 70B MXFP8 quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --dtype MXFP8 --quant_lm_head --iters 0 --export_path \"$OUTPUT_MODEL\""
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    $COMMON_ARGS \
+                    --dtype MXFP8 \
+                    --quant_lm_head \
+                    --iters 0 \
+                    --export_path "$OUTPUT_MODEL"
+                ;;
+            "mxfp4_mixed")
+                echo "Running Llama 3.3 70B MXFP4 (Mixed with MXFP8) quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --target_bits 5.8 --options \"MXFP4\" \"MXFP8\" --shared_layer \"k_proj\" \"v_proj\" \"q_proj\" --shared_layer \"gate_proj\" \"up_proj\" --export_path \"$OUTPUT_MODEL\""
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    $COMMON_ARGS \
+                    --target_bits 5.8 \
+                    --options "MXFP4" "MXFP8" \
+                    --shared_layer "k_proj" "v_proj" "q_proj" \
+                    --shared_layer "gate_proj" "up_proj" \
+                    --export_path "$OUTPUT_MODEL"
+                ;;
+            *)
+                echo "Error: Unsupported dtype '$DTYPE' for topology '$TOPOLOGY'"
+                echo "Supported dtypes for Llama-3.3-70B: mxfp8, mxfp4_mixed"
+                exit 1
+                ;;
+        esac
+        ;;
+    "Llama-3.1-70B")
+        case "$DTYPE" in
+            "mxfp8")
+                echo "Running Llama 3.1 70B MXFP8 quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --dtype MXFP8 --quant_lm_head --iters 0 --export_path \"$OUTPUT_MODEL\""
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    $COMMON_ARGS \
+                    --dtype MXFP8 \
+                    --quant_lm_head \
+                    --iters 0 \
+                    --export_path "$OUTPUT_MODEL"
+                ;;
+            "unvfp4")
+                echo "Running Llama 3.1 70B uNVFP4 quantization..."
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" --quantize --dtype uNVFP4 --quant_lm_head --iters 0 --enable_torch_compile --low_gpu_mem_usage --export_format fake --export_path \"$OUTPUT_MODEL\" --accuracy"
+                echo "Executing command: $CMD"
+                python quantize.py \
+                    --model_name_or_path "$INPUT_MODEL" \
+                    --quantize \
+                    --dtype uNVFP4 \
+                    --quant_lm_head \
+                    --iters 0 \
+                    --enable_torch_compile \
+                    --low_gpu_mem_usage \
+                    --export_format fake \
+                    --export_path "$OUTPUT_MODEL" \
+                    --accuracy
+                ;;
+            *)
+                echo "Error: Unsupported dtype '$DTYPE' for topology '$TOPOLOGY'"
+                echo "Supported dtypes for Llama-3.3-70B: mxfp8, mxfp4_mixed"
+                exit 1
+                ;;
+        esac
+        ;;
+    *)
+        echo "Error: Unsupported topology '$TOPOLOGY'"
+        echo "Supported topologies: Llama-3.1-8B, Llama-3.3-70B"
+        exit 1
+        ;;
+esac
+
+if [[ $? -eq 0 ]]; then
+    echo "Quantization completed successfully!"
+    echo "Output model saved to: $OUTPUT_MODEL"
+else
+    echo "Quantization failed!"
+    exit 1
+fi
\ No newline at end of file
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index 1c1821c3891..f84ff7e7871 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -384,6 +384,8 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
                 model = transformers.AutoModelForCausalLM.from_pretrained(self.output_dir)
             except:
                 pass
+        else:
+            self.accelerator.empty_cache()
 
         return model
 

From dcd69a27db5c7ff7133df674ca4c070412172755 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 01:02:04 -0500
Subject: [PATCH 04/10] update readme

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 .../quantization/auto_round/llama3/README.md                  | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
index d8c842eaab2..2673f7bf3f2 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -127,9 +127,7 @@ Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES` or reducing
 
 ```bash
 # Install the forked vLLM
-git clone https://github.com/yiliu30/vllm-fork.git
-cd vllm-fork
-git checkout fused-moe-ar
+git clone -b fused-moe-ar --single-branch --quiet https://github.com/yiliu30/vllm-fork.git && cd vllm-fork
 VLLM_USE_PRECOMPILED=1 pip install -e .
 ```
 

From f07ca2da95e8db6ecd6255aa8da44b88456178ac Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 14:03:04 +0800
Subject: [PATCH 05/10] Update
 neural_compressor/torch/algorithms/weight_only/autoround.py

---
 neural_compressor/torch/algorithms/weight_only/autoround.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index f84ff7e7871..f236a8dba42 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -351,7 +351,6 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             enable_norm_bias_tuning=self.enable_norm_bias_tuning,
             truncation=self.truncation,
             enable_torch_compile=self.enable_torch_compile,
-            # TODO: AutoRound is using layer_config to quantize lm_head, remove it.
             quant_lm_head=self.quant_lm_head,
             guidance_scale=self.guidance_scale,
             num_inference_steps=self.num_inference_steps,

From bca20632edeb4dccf1a294afad7d45ad2152573a Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 14:03:33 +0800
Subject: [PATCH 06/10] Update neural_compressor/common/base_config.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 neural_compressor/common/base_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index 2ad0eb61744..17ccd7f6457 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -532,7 +532,7 @@ def expand(self) -> List[BaseConfig]:
             # Assign the options to the `TuningParam` instance
             param_val = getattr(config, tuning_param.name)
             if param_val is not None:
-                if param not in self.non_tunable_params and tuning_param.is_tunable(param_val):
+                if tuning_param.name not in self.non_tunable_params and tuning_param.is_tunable(param_val):
                     tuning_param.options = param_val
                     tuning_param_list.append(tuning_param)
                 else:

From 1d812a0917e1e2130585071582f5d100cc24935a Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 14:03:48 +0800
Subject: [PATCH 07/10] Update
 neural_compressor/torch/algorithms/weight_only/autoround.py

Co-authored-by: Tang Kaihui <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/autoround.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index f236a8dba42..860358f9224 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -377,7 +377,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             try:
                 del model
                 self.accelerator.empty_cache()
-                logger.info("Quantization is done, reloading model from saved directory...")
+                logger.info(f"Quantization is done, reloading model from saved directory({self.output_dir})...")
                 import transformers  # pylint: disable=E0401
 
                 model = transformers.AutoModelForCausalLM.from_pretrained(self.output_dir)

From 99b8fffc51faad1700e9420f1f612dfe21bfd690 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Wed, 26 Nov 2025 21:05:14 -0500
Subject: [PATCH 08/10] fix bug

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/torch/quantization/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index b1183ef3c64..5a86ddd4fb0 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -85,8 +85,9 @@ class TorchBaseConfig(BaseConfig):
 
     # re-write func _get_op_name_op_type_config to fallback op_type with string
     # because there are some special op_types for IPEX backend: `Linear&Relu`, `Linear&add`, ...
-    def __init__(self, white_list):
+    def __init__(self, white_list=DEFAULT_WHITE_LIST):
         super().__init__(white_list)
+        self.params_list = self.__class__._generate_params_list()
         self.non_tunable_params: List[str] = ["white_list"]
 
     def _get_op_name_op_type_config(self):
@@ -966,7 +967,6 @@ def __init__(
             output_dir (str): The output directory for temporary files (default is "./temp_auto_round").
         """
         super().__init__(white_list=white_list)
-        self.params_list = self.__class__._generate_params_list()
         # these two params are lists but not tunable
         self.non_tunable_params.extend(["options", "shared_layers"])
 

From 3ffb650cb8ac9a00176fe32bc13f705bf771eb66 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Thu, 27 Nov 2025 21:27:05 -0500
Subject: [PATCH 09/10] update readme and fix CI

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 .../quantization/auto_round/llama3/README.md             | 9 ++++-----
 neural_compressor/common/base_config.py                  | 1 +
 .../torch/algorithms/weight_only/autoround.py            | 5 +----
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
index 2673f7bf3f2..3b22bd723a1 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -8,7 +8,7 @@ In this example, you can verify the accuracy on HPU/CUDA device with emulation o
 # neural-compressor-pt
 pip install neural-compressor-pt==3.7
 # auto-round
-pip install auto-round==0.9.1
+pip install auto-round==0.9.2
 # other requirements
 pip install -r requirements.txt
 ```
@@ -19,7 +19,7 @@ pip install -r requirements.txt
 # neural-compressor-pt
 INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master
 # auto-round
-pip install git+https://github.com/intel/auto-round.git@main
+pip install git+https://github.com/intel/auto-round.git@more-ar-ext
 # other requirements
 pip install -r requirements.txt
 ```
@@ -44,7 +44,7 @@ CUDA_VISIBLE_DEVICES=0 python quantize.py  \
 ```
 
 Notes:
-- Use `--export_format auto_round` for `MXFP4`, `MXFP8` data type and do inference as [below](#mxfp4--mxfp8)
+- Use `--export_format auto_round` for `MXFP4`, `MXFP8` data type and do inference as below.
 - Use `--export_format llm_compressor` for `NVFP4` data type since public vLLM supports it.
 - Use `--export_format fake` for `uNVFP4` data type since it's not fully supported.
 - Setting `--quant_lm_head` applies `--dtype` for the lm_head layer.
@@ -87,7 +87,6 @@ AutoRound helps improve the accuracy, `iters` and `nsamples` is higher than defa
 CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-8B --dtype=mxfp8 --input_model=/models/Meta-Llama-3.1-8B-Instruct --output_model=Llama-3.1-8B-MXFP8
 ```
 
-
 #### Llama 3.1 8B MXFP4 (Mixed with MXFP8, Target_bits=7.8)
 
 ```bash
@@ -119,7 +118,7 @@ Note: If you got OOM issue, either increasing `CUDA_VISIBLE_DEVICES` or reducing
 
 ## Inference
 
-### MXFP4 / MXFP8
+### MXFP4 & MXFP8
 
 - Both pure MXFP4/MXFP8 and mix-precision model generated by target bits are supported.
 
diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index 17ccd7f6457..4531f0115ff 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -190,6 +190,7 @@ class BaseConfig(ABC):
     name = BASE_CONFIG
     params_list = []
     _is_initialized = False
+    non_tunable_params = ["white_list"]
 
     def __init__(self, white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST) -> None:
         """Initialize the BaseConfig.
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
index 860358f9224..edaac306491 100644
--- a/neural_compressor/torch/algorithms/weight_only/autoround.py
+++ b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -370,21 +370,18 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             model = rounder.model
             model.autoround_config = rounder.layer_config
 
+        self.accelerator.empty_cache()
         dump_model_op_stats(rounder.layer_config)
 
         if self.export_format in ["auto_round", "llm_compressor"]:
             # the directly returned model is QuantLinear, which is used for packing.
             try:
-                del model
-                self.accelerator.empty_cache()
                 logger.info(f"Quantization is done, reloading model from saved directory({self.output_dir})...")
                 import transformers  # pylint: disable=E0401
 
                 model = transformers.AutoModelForCausalLM.from_pretrained(self.output_dir)
             except:
                 pass
-        else:
-            self.accelerator.empty_cache()
 
         return model
 

From 54f87bba682a1085b9b8f1566567527390dd1946 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Fri, 28 Nov 2025 03:06:42 -0500
Subject: [PATCH 10/10] fix CI

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/torch/quantization/config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index 5a86ddd4fb0..be6682c737c 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -1794,6 +1794,7 @@ def __init__(
         self.observer = observer
         self.mod_dict = mod_dict
         self._json_file = None
+        self.measure_exclude = measure_exclude
         self.fake_quant = str(fake_quant)
         self.use_qdq = str(use_qdq)
         self.scale_format = scale_format