# AWQ量化

In [None]:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
from transformers import AwqConfig, AutoConfig
import warnings
warnings.filterwarnings("ignore")

## 1、加载模型

In [None]:
model_name="Qwen/Qwen3-0.6B"
model = AutoAWQForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

## 2、配置quant_config

In [None]:
quant_path = "../../models/qwen3-7b-awq"
quant_config = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version":"GEMM"}

## 3、开始模型量化

In [None]:
model.quantize(tokenizer, quant_config=quant_config)

## 4、调整量化配置以兼容 Transformers

In [None]:
quantization_config = AwqConfig(
    bits=quant_config["w_bit"],
    group_size=quant_config["q_group_size"],
    zero_point=quant_config["zero_point"],
    version=quant_config["version"].lower(),
).to_dict()
model.model.config.quantization_config = quantization_config

## 5、保存量化后的模型和分词器

In [None]:
model.save_quantized(quant_path)
tokenizer.save_pretrained(quant_path)