intel · minmingzhu · Mar 14, 2024 · Mar 15, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
@@ -34,7 +34,7 @@ jobs:
     name: finetune
     strategy:
       matrix:
-        model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b, mistralai/Mistral-7B-v0.1 ]
+        model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b, mistralai/Mistral-7B-v0.1, mistralai/Mixtral-8x7B-Instruct-v0.1 ]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
 
@@ -44,6 +44,7 @@ jobs:
           - { model: "EleutherAI/gpt-j-6b"}
           - { model: "meta-llama/Llama-2-7b-chat-hf"}
           - { model: "mistralai/Mistral-7B-v0.1"}
+          - { model: "mistralai/Mixtral-8x7B-Instruct-v0.1"}
 
     runs-on: self-hosted
 
@@ -132,6 +133,8 @@ jobs:
               }
               if "${{ matrix.model }}" == "mistralai/Mistral-7B-v0.1":
                   result['General']['lora_config']['target_modules'] = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head",]
+              elif "${{ matrix.model }}" == "mistralai/Mixtral-8x7B-Instruct-v0.1":
+                  result['General']['lora_config']['target_modules'] = ["k_proj", "v_proj"]
               else:
                   result['General']['lora_config']['target_modules'] = None
           with open(conf_path, 'w') as output:
@@ -143,7 +146,7 @@ jobs:
 
       - name: Run Deltatuner Test on DENAS-LoRA Model
         run: |
-          if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1)$ ]]; then
+          if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|mistralai\/Mixtral-8x7B-Instruct-v0.1)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
             docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
@@ -34,7 +34,7 @@ jobs:
     name: inference
     strategy:
       matrix:
-        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm ]
+        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, mixtral-8x7b-Instruct-v0.1 ]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
 
@@ -46,6 +46,7 @@ jobs:
           - { model: "mistral-7b-v0.1"}
           - { model: "mpt-7b-bigdl"}
           - { model: "llama-2-7b-chat-hf-vllm"}
+          - { model: "mixtral-8x7b-Instruct-v0.1"}
           - dtuner_model: nathan0/mpt-7b-deltatuner-model
             model: mpt-7b
 
@@ -158,7 +159,7 @@ jobs:
       - name: Run Inference Test with DeepSpeed
         run: |
           TARGET=${{steps.target.outputs.target}}
-          if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
+          if [[ ${{ matrix.model }} =~ ^(mixtral-8x7b-Instruct-v0.1|gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
             docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llm_on_ray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
@@ -171,7 +172,7 @@ jobs:
         if: ${{ matrix.dtuner_model }}
         run: |
           TARGET=${{steps.target.outputs.target}}
-          if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
+          if [[ ${{ matrix.model }} =~ ^(mixtral-8x7b-Instruct-v0.1|gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
             docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"

diff --git a/llm_on_ray/finetune/models/mixtral-8x7b-Instruct-v0.1.yaml b/llm_on_ray/finetune/models/mixtral-8x7b-Instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+General:
+  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
+  gpt_base_model: false
+  output_dir: /tmp/llm-ray/output
+  checkpoint_dir: /tmp/llm-ray/checkpoint
+  config:
+    trust_remote_code: false
+    use_auth_token: null
+  lora_config:
+    task_type: CAUSAL_LM
+    r: 8
+    lora_alpha: 32
+    lora_dropout: 0.1
+    target_modules:
+    - q_proj
+    - v_proj
+  enable_gradient_checkpointing: false
+Dataset:
+  train_file: examples/data/sample_finetune_data_small.jsonl
+  validation_file: null
+  validation_split_percentage: 5
+Training:
+  optimizer: AdamW
+  batch_size: 2
+  epochs: 3
+  learning_rate: 1.0e-05
+  lr_scheduler: linear
+  weight_decay: 0.0
+  mixed_precision: bf16
+  device: CPU
+  num_training_workers: 2
+  resources_per_worker:
+    CPU: 2
+  accelerate_mode: CPU_DDP
+  gradient_accumulation_steps: 1
+  logging_steps: 10
diff --git a/llm_on_ray/inference/models/mixtral-8x7b-Instruct-v0.1.yaml b/llm_on_ray/inference/models/mixtral-8x7b-Instruct-v0.1.yaml
@@ -0,0 +1,22 @@
+port: 8000
+name: mixtral-8x7b-Instruct-v0.1
+route_prefix: /mixtral-8x7b-Instruct-v0.1
+num_replicas: 1
+cpus_per_worker: 24
+gpus_per_worker: 0
+deepspeed: false
+workers_per_group: 2
+device: CPU
+ipex:
+  enabled: true
+  precision: bf16
+model_description:
+  model_id_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
+  bigdl: false
+  tokenizer_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
+  chat_processor: ChatModelLLama
+  prompt:
+    intro: ''
+    human_id: '<s>[INST] {msg} [/INST]'
+    bot_id: ''
+    stop_words: []
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
 
 [project.optional-dependencies]
 cpu = [
-    "transformers>=4.35.0, <=4.35.2",
+    "transformers>=4.36.0, <=4.38.1",
     "intel_extension_for_pytorch>=2.2.0",
     "torch>=2.2.0",
     "oneccl_bind_pt>=2.2.0"