Skip to content

Commit

Permalink
integrate Mixtral-8x7B-Instruct-v0.1 inference
Browse files Browse the repository at this point in the history
Signed-off-by: minmingzhu <minming.zhu@intel.com>
  • Loading branch information
minmingzhu committed Mar 19, 2024
1 parent 3710b4c commit 3ff4373
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/workflow_finetune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ jobs:
- name: Run Deltatuner Test on DENAS-LoRA Model
run: |
if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1)$ ]]; then
if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|google\/gemma-2b)$ ]]; then
echo ${{ matrix.model }} is not supported!
else
docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
name: inference
strategy:
matrix:
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm ]
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, mixtral-8x7B-Instruct-v0.1 ]
isPR:
- ${{inputs.ci_type == 'pr'}}

Expand All @@ -46,6 +46,7 @@ jobs:
- { model: "mistral-7b-v0.1"}
- { model: "mpt-7b-bigdl"}
- { model: "llama-2-7b-chat-hf-vllm"}
- { model: "mixtral-8x7B-Instruct-v0.1"}
- dtuner_model: nathan0/mpt-7b-deltatuner-model
model: mpt-7b

Expand Down
22 changes: 22 additions & 0 deletions llm_on_ray/inference/models/mixtral-8x7B-Instruct-v0.1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
port: 8000
name: Mixtral-8x7B-Instruct-v0.1
route_prefix: /Mixtral-8x7B-Instruct-v0.1
num_replicas: 1
cpus_per_worker: 24
gpus_per_worker: 0
deepspeed: false
workers_per_group: 2
device: CPU
ipex:
enabled: true
precision: bf16
model_description:
model_id_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
bigdl: false
tokenizer_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
chat_processor: ChatModelLLama
prompt:
intro: ''
human_id: '<s>[INST] {msg} [/INST]'
bot_id: ''
stop_words: []

0 comments on commit 3ff4373

Please sign in to comment.