Skip to content

Commit

Permalink
[Cpp Graph] Update Falcon HF para and support Falcon-180B (#414)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhentaoyu committed Oct 12, 2023
1 parent c7f8173 commit 900ebf4
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from transformers import AutoConfig
from intel_extension_for_transformers.llm.runtime.graph.scripts.convert import convert_model
import torch
model_maps = {"gpt_neox": "gptneox", "RefinedWebModel": "falcon"}
model_maps = {"gpt_neox": "gptneox"}

class Model:
def __init__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ static const model_scratch falcon_mem_req(int n_layers) {
// TODO(hengyu): add more variants besides 6B
case 60:
return {2 * 2048ull * MB, 2 * 2048ull * MB, 2 * 4096ull * MB};
case 80:
return {3 * 2048ull * MB, 3 * 2048ull * MB, 3 * 4096ull * MB};
default:
MODEL_ASSERT(false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ void FALCON::init(const char* path_model, model_context& lctx, int n_ctx_, int n
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
fprintf(stderr, "%s: n_mult = %u\n", __func__, hparams.n_mult);
fprintf(stderr, "%s: n_head = %u\n", __func__, hparams.n_head);
fprintf(stderr, "%s: n_head_kv = %u\n", __func__, hparams.n_head_kv);
fprintf(stderr, "%s: n_layer = %u\n", __func__, hparams.n_layer);
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from transformers import AutoConfig
import subprocess

model_maps = {"gpt_neox": "gptneox", "RefinedWebModel": "falcon"}
model_maps = {"gpt_neox": "gptneox"}


def convert_model(model, outfile, outtype):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,18 @@ def main(args_in: Optional[List[str]] = None) -> None:

tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
config = AutoConfig.from_pretrained(dir_model, trust_remote_code=True)
hparams = config.to_dict()
with open(os.path.join(dir_model,"config.json"), "r", encoding="utf-8") as f:
hparams = json.load(f)
if hparams["architectures"][0] != "FalconForCausalLM":
print("Model architecture not supported: " + hparams["architectures"][0])
sys.exit(1)
print("Loading model: ", dir_model)
model = AutoModelForCausalLM.from_pretrained(dir_model, config=config, torch_dtype=torch.float16
if ftype == 1 else torch.float32, low_cpu_mem_usage=True, trust_remote_code=True)
print("Model loaded: ", dir_model)

n_head_kv = hparams.get("n_head_kv", 1)
n_head = hparams["n_head"]
n_head_kv = hparams.get("num_kv_heads", 1)
n_head = hparams["num_attention_heads"]
head_dim = hparams["hidden_size"] // n_head

fout = open(fname_out, "wb")
Expand All @@ -85,7 +89,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", n_head))
fout.write(struct.pack("i", n_head_kv)) # multi-query attention
fout.write(struct.pack("i", hparams["n_layer"]))
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", ftype))
fout.write(struct.pack("i", 0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from transformers import AutoConfig
import subprocess

model_maps = {"gpt_neox": "gptneox", "RefinedWebModel": "falcon"}
model_maps = {"gpt_neox": "gptneox"}
build_path = Path(Path(__file__).parent.absolute(), "../build/")

def str2bool(v):
Expand Down

0 comments on commit 900ebf4

Please sign in to comment.