From 4ce5cf7c277176296fad6dd023ae23d702501c72 Mon Sep 17 00:00:00 2001
From: changwangss <chang1.wang@intel.com>
Date: Wed, 26 Jun 2024 23:44:57 -0700
Subject: [PATCH 1/3] improve SQ mpt

Signed-off-by: changwangss <chang1.wang@intel.com>
---
 .../transformers/modeling/modeling_auto.py    |  9 ++++
 .../modeling/mosaicml_mpt-7b_config.json      | 48 +++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json

diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
index 263e4784d92..d11a72e4d43 100644
--- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
+++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -840,6 +840,14 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                 or device_map == torch.device("cpu")
             ) and model.config.model_type == "chatglm":
                 model = model.float()
+            if (
+                not torch.cuda.is_available()
+                or device_map == "cpu"
+                or device_map == torch.device("cpu")
+            ) and model.config.model_type == "mpt":
+                config = AutoConfig.from_pretrained("mosaicml_mpt-7b_config.json",
+                                                    torchscript=True)
+                model.config = config
             model.eval()
             model_type = model.config.model_type.replace("_", "-")
 
@@ -1077,6 +1085,7 @@ def calib_func(model):
                 recipes=quantization_config.recipes,
                 example_inputs=example_inputs,
             )
+
             model = quantization.fit(
                 model,
                 conf,
diff --git a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json
new file mode 100644
index 00000000000..9a9cc31be91
--- /dev/null
+++ b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json
@@ -0,0 +1,48 @@
+{
+    "architectures": [
+        "MptForCausalLM"
+    ],
+    "attn_config": {
+        "alibi": true,
+        "alibi_bias_max": 8,
+        "attn_impl": "torch",
+        "attn_pdrop": 0,
+        "attn_type": "multihead_attention",
+        "attn_uses_sequence_id": false,
+        "clip_qkv": null,
+        "prefix_lm": false,
+        "qk_ln": false,
+        "softmax_scale": null
+    },
+    "d_model": 4096,
+    "emb_pdrop": 0,
+    "embedding_fraction": 1.0,
+    "expansion_ratio": 4,
+    "init_config": {
+        "emb_init_std": null,
+        "emb_init_uniform_lim": null,
+        "fan_mode": "fan_in",
+        "init_div_is_residual": true,
+        "init_gain": 0,
+        "init_nonlinearity": "relu",
+        "init_std": 0.02,
+        "name": "kaiming_normal_",
+        "verbose": 0
+    },
+    "init_device": "cpu",
+    "learned_pos_emb": true,
+    "logit_scale": null,
+    "max_seq_len": 2048,
+    "model_type": "mpt",
+    "n_heads": 32,
+    "n_layers": 32,
+    "no_bias": true,
+    "norm_type": "low_precision_layernorm",
+    "resid_pdrop": 0,
+    "tokenizer_name": "EleutherAI/gpt-neox-20b",
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.28.1",
+    "use_cache": false,
+    "verbose": 0,
+    "vocab_size": 50432
+}

From f1dc6b8875807e95f14962863aeb8ed2915d3b5f Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Thu, 27 Jun 2024 17:23:08 +0800
Subject: [PATCH 2/3] Update modeling_auto.py

Signed-off-by: Wang, Chang <chang1.wang@intel.com>
---
 .../transformers/modeling/modeling_auto.py                  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
index d11a72e4d43..fd4b1a81d5a 100644
--- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
+++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -845,8 +845,10 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                 or device_map == "cpu"
                 or device_map == torch.device("cpu")
             ) and model.config.model_type == "mpt":
-                config = AutoConfig.from_pretrained("mosaicml_mpt-7b_config.json",
-                                                    torchscript=True)
+                config = AutoConfig.from_pretrained(
+                    os.path.join(os.path.dirname(__file__), "mosaicml_mpt-7b_config.json"),
+                    torchscript=True
+                )
                 model.config = config
             model.eval()
             model_type = model.config.model_type.replace("_", "-")

From c4450afe4138b30046a7fcb14bddf983faf0d6db Mon Sep 17 00:00:00 2001
From: changwangss <chang1.wang@intel.com>
Date: Thu, 27 Jun 2024 21:58:31 -0700
Subject: [PATCH 3/3] fix mpt achitectures

Signed-off-by: changwangss <chang1.wang@intel.com>
---
 .../transformers/modeling/modeling_auto.py    |  6 +--
 .../modeling/mosaicml_mpt-7b_config.json      | 48 -------------------
 2 files changed, 1 insertion(+), 53 deletions(-)
 delete mode 100644 intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json

diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
index fd4b1a81d5a..a5be8cdc519 100644
--- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
+++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -845,11 +845,7 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                 or device_map == "cpu"
                 or device_map == torch.device("cpu")
             ) and model.config.model_type == "mpt":
-                config = AutoConfig.from_pretrained(
-                    os.path.join(os.path.dirname(__file__), "mosaicml_mpt-7b_config.json"),
-                    torchscript=True
-                )
-                model.config = config
+                model.config.architectures = ["MptForCausalLM"]
             model.eval()
             model_type = model.config.model_type.replace("_", "-")
 
diff --git a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json b/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json
deleted file mode 100644
index 9a9cc31be91..00000000000
--- a/intel_extension_for_transformers/transformers/modeling/mosaicml_mpt-7b_config.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-    "architectures": [
-        "MptForCausalLM"
-    ],
-    "attn_config": {
-        "alibi": true,
-        "alibi_bias_max": 8,
-        "attn_impl": "torch",
-        "attn_pdrop": 0,
-        "attn_type": "multihead_attention",
-        "attn_uses_sequence_id": false,
-        "clip_qkv": null,
-        "prefix_lm": false,
-        "qk_ln": false,
-        "softmax_scale": null
-    },
-    "d_model": 4096,
-    "emb_pdrop": 0,
-    "embedding_fraction": 1.0,
-    "expansion_ratio": 4,
-    "init_config": {
-        "emb_init_std": null,
-        "emb_init_uniform_lim": null,
-        "fan_mode": "fan_in",
-        "init_div_is_residual": true,
-        "init_gain": 0,
-        "init_nonlinearity": "relu",
-        "init_std": 0.02,
-        "name": "kaiming_normal_",
-        "verbose": 0
-    },
-    "init_device": "cpu",
-    "learned_pos_emb": true,
-    "logit_scale": null,
-    "max_seq_len": 2048,
-    "model_type": "mpt",
-    "n_heads": 32,
-    "n_layers": 32,
-    "no_bias": true,
-    "norm_type": "low_precision_layernorm",
-    "resid_pdrop": 0,
-    "tokenizer_name": "EleutherAI/gpt-neox-20b",
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.28.1",
-    "use_cache": false,
-    "verbose": 0,
-    "vocab_size": 50432
-}