From 4ce5cf7c277176296fad6dd023ae23d702501c72 Mon Sep 17 00:00:00 2001 From: changwangss Date: Wed, 26 Jun 2024 23:44:57 -0700 Subject: [PATCH 1/3] improve SQ mpt Signed-off-by: changwangss --- .../transformers/modeling/modeling_auto.py | 9 ++++ .../modeling/mosaicml_mpt-7b_config.json | 48 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index 263e4784d92..d11a72e4d43 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -840,6 +840,14 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]: or device_map == torch.device("cpu") ) and model.config.model_type == "chatglm": model = model.float() + if ( + not torch.cuda.is_available() + or device_map == "cpu" + or device_map == torch.device("cpu") + ) and model.config.model_type == "mpt": + config = AutoConfig.from_pretrained("mosaicml_mpt-7b_config.json", + torchscript=True) + model.config = config model.eval() model_type = model.config.model_type.replace("_", "-") @@ -1077,6 +1085,7 @@ def calib_func(model): recipes=quantization_config.recipes, example_inputs=example_inputs, ) + model = quantization.fit( model, conf, diff --git a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json new file mode 100644 index 00000000000..9a9cc31be91 --- /dev/null +++ b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json @@ -0,0 +1,48 @@ +{ + "architectures": [ + "MptForCausalLM" + ], + "attn_config": { + "alibi": true, + "alibi_bias_max": 8, + "attn_impl": "torch", + "attn_pdrop": 0, + "attn_type": "multihead_attention", + "attn_uses_sequence_id": false, + "clip_qkv": null, + "prefix_lm": false, + "qk_ln": false, + "softmax_scale": null + }, + "d_model": 4096, + "emb_pdrop": 0, + "embedding_fraction": 1.0, + "expansion_ratio": 4, + "init_config": { + "emb_init_std": null, + "emb_init_uniform_lim": null, + "fan_mode": "fan_in", + "init_div_is_residual": true, + "init_gain": 0, + "init_nonlinearity": "relu", + "init_std": 0.02, + "name": "kaiming_normal_", + "verbose": 0 + }, + "init_device": "cpu", + "learned_pos_emb": true, + "logit_scale": null, + "max_seq_len": 2048, + "model_type": "mpt", + "n_heads": 32, + "n_layers": 32, + "no_bias": true, + "norm_type": "low_precision_layernorm", + "resid_pdrop": 0, + "tokenizer_name": "EleutherAI/gpt-neox-20b", + "torch_dtype": "bfloat16", + "transformers_version": "4.28.1", + "use_cache": false, + "verbose": 0, + "vocab_size": 50432 +} From f1dc6b8875807e95f14962863aeb8ed2915d3b5f Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Thu, 27 Jun 2024 17:23:08 +0800 Subject: [PATCH 2/3] Update modeling_auto.py Signed-off-by: Wang, Chang --- .../transformers/modeling/modeling_auto.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index d11a72e4d43..fd4b1a81d5a 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -845,8 +845,10 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]: or device_map == "cpu" or device_map == torch.device("cpu") ) and model.config.model_type == "mpt": - config = AutoConfig.from_pretrained("mosaicml_mpt-7b_config.json", - torchscript=True) + config = AutoConfig.from_pretrained( + os.path.join(os.path.dirname(__file__), "mosaicml_mpt-7b_config.json"), + torchscript=True + ) model.config = config model.eval() model_type = model.config.model_type.replace("_", "-") From c4450afe4138b30046a7fcb14bddf983faf0d6db Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 27 Jun 2024 21:58:31 -0700 Subject: [PATCH 3/3] fix mpt achitectures Signed-off-by: changwangss --- .../transformers/modeling/modeling_auto.py | 6 +-- .../modeling/mosaicml_mpt-7b_config.json | 48 ------------------- 2 files changed, 1 insertion(+), 53 deletions(-) delete mode 100644 intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index fd4b1a81d5a..a5be8cdc519 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -845,11 +845,7 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]: or device_map == "cpu" or device_map == torch.device("cpu") ) and model.config.model_type == "mpt": - config = AutoConfig.from_pretrained( - os.path.join(os.path.dirname(__file__), "mosaicml_mpt-7b_config.json"), - torchscript=True - ) - model.config = config + model.config.architectures = ["MptForCausalLM"] model.eval() model_type = model.config.model_type.replace("_", "-") diff --git a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json deleted file mode 100644 index 9a9cc31be91..00000000000 --- a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "architectures": [ - "MptForCausalLM" - ], - "attn_config": { - "alibi": true, - "alibi_bias_max": 8, - "attn_impl": "torch", - "attn_pdrop": 0, - "attn_type": "multihead_attention", - "attn_uses_sequence_id": false, - "clip_qkv": null, - "prefix_lm": false, - "qk_ln": false, - "softmax_scale": null - }, - "d_model": 4096, - "emb_pdrop": 0, - "embedding_fraction": 1.0, - "expansion_ratio": 4, - "init_config": { - "emb_init_std": null, - "emb_init_uniform_lim": null, - "fan_mode": "fan_in", - "init_div_is_residual": true, - "init_gain": 0, - "init_nonlinearity": "relu", - "init_std": 0.02, - "name": "kaiming_normal_", - "verbose": 0 - }, - "init_device": "cpu", - "learned_pos_emb": true, - "logit_scale": null, - "max_seq_len": 2048, - "model_type": "mpt", - "n_heads": 32, - "n_layers": 32, - "no_bias": true, - "norm_type": "low_precision_layernorm", - "resid_pdrop": 0, - "tokenizer_name": "EleutherAI/gpt-neox-20b", - "torch_dtype": "bfloat16", - "transformers_version": "4.28.1", - "use_cache": false, - "verbose": 0, - "vocab_size": 50432 -}