# Finetune LLaMA2 and MPT on Intel Xeon CPU

## 1. Prerequisite​

### 1.1 Setup Environment​

In [None]:
!pip install intel-extension-for-transformers torch datasets

### 1.2 Prepare Dataset

Download Alpaca dataset from [here](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json).

In [1]:
alpaca_data_path = "/path/to/alpaca_data.json"
llama2_model_name_or_path = "meta-llama/Llama-2-7b-hf"
mpt_model_name_or_path = "mosaicml/mpt-7b"

## 2. Finetune LLaMA2 on Intel Xeon CPU with LoRA

### 2.1 Setup Finetuning Config

In [2]:
from transformers import TrainingArguments
from intel_extension_for_transformers.neural_chat.config import (
    ModelArguments,
    DataArguments,
    FinetuningArguments,
    TextGenerationFinetuningConfig,
)

model_args = ModelArguments(
    model_name_or_path=llama2_model_name_or_path,
    use_fast_tokenizer=False,
)

data_args = DataArguments(
    train_file=alpaca_data_path,
    dataset_concatenation=True,
)

training_args = TrainingArguments(
    output_dir="./llama_peft_finetuned_model",
    overwrite_output_dir=True,
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=1e-4,
    num_train_epochs=3,
    save_strategy="no",
    log_level="info",
    save_total_limit=2,
    bf16=True,
)

finetune_args = FinetuningArguments(
    lora_all_linear=True,
    do_lm_eval=True,
)

finetune_cfg = TextGenerationFinetuningConfig(
        model_args=model_args,
        data_args=data_args,
        training_args=training_args,
        finetune_args=finetune_args,
)

  from .autonotebook import tqdm as notebook_tqdm


Package 'habana_frameworks.torch.hpu' is not installed.
Package 'intel_extension_for_pytorch' is not installed.


### 2.2 Finetuning

In [3]:
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model
finetune_model(finetune_cfg)

distributed training: True, 16-bits training: True


09/03/2023 22:03:27 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Training/evaluation parameters TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=2,
gradient_checkpointing=False,
greater_

[INFO|configuration_utils.py:710] 2023-09-03 22:03:27,368 >> loading configuration file /home/devcloud/xyy/models/Llama-2-7b-hf/config.json


[INFO|configuration_utils.py:768] 2023-09-03 22:03:27,374 >> Model config LlamaConfig {
  "_name_or_path": "/home/devcloud/xyy/models/Llama-2-7b-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.31.0",
  "use_cache": true,
  "vocab_size": 32000
}



[INFO|configuration_utils.py:710] 2023-09-03 22:03:27,376 >> loading configuration file /home/devcloud/xyy/models/Llama-2-7b-hf/config.json


[INFO|configuration_utils.py:768] 2023-09-03 22:03:27,380 >> Model config LlamaConfig {
  "_name_or_path": "/home/devcloud/xyy/models/Llama-2-7b-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.31.0",
  "use_cache": true,
  "vocab_size": 32000
}



[INFO|tokenization_utils_base.py:1837] 2023-09-03 22:03:27,385 >> loading file tokenizer.model


[INFO|tokenization_utils_base.py:1837] 2023-09-03 22:03:27,387 >> loading file added_tokens.json


[INFO|tokenization_utils_base.py:1837] 2023-09-03 22:03:27,390 >> loading file special_tokens_map.json


[INFO|tokenization_utils_base.py:1837] 2023-09-03 22:03:27,392 >> loading file tokenizer_config.json




Using custom data configuration default-b171c08c62141a34


09/03/2023 22:03:27 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/03/2023 22:03:27 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/03/2023 22:03:27 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:27 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/03/2023 22:03:27 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:27 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Using custom data configuration default-b171c08c62141a34


09/03/2023 22:03:28 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/03/2023 22:03:28 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/03/2023 22:03:28 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Using custom data configuration default-b171c08c62141a34


09/03/2023 22:03:28 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/03/2023 22:03:28 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/03/2023 22:03:28 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/03/2023 22:03:28 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


[INFO|modeling_utils.py:2600] 2023-09-03 22:03:28,763 >> loading weights file /home/devcloud/xyy/models/Llama-2-7b-hf/model.safetensors.index.json


[INFO|modeling_utils.py:1172] 2023-09-03 22:03:28,765 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.


[INFO|configuration_utils.py:599] 2023-09-03 22:03:28,768 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0,
  "transformers_version": "4.31.0"
}




Loading checkpoint shards:   0%|                                                                                                                                              | 0/2 [00:00<?, ?it/s]


Loading checkpoint shards:  50%|███████████████████████████████████████████████████████████████████                                                                   | 1/2 [00:01<00:01,  1.67s/it]


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.02s/it]


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.12s/it]


[INFO|modeling_utils.py:3329] 2023-09-03 22:03:31,319 >> All model checkpoint weights were used when initializing LlamaForCausalLM.



[INFO|modeling_utils.py:3337] 2023-09-03 22:03:31,320 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /home/devcloud/xyy/models/Llama-2-7b-hf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.


[INFO|configuration_utils.py:559] 2023-09-03 22:03:31,324 >> loading configuration file /home/devcloud/xyy/models/Llama-2-7b-hf/generation_config.json


[INFO|configuration_utils.py:599] 2023-09-03 22:03:31,326 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_length": 4096,
  "pad_token_id": 0,
  "temperature": 0.6,
  "top_p": 0.9,
  "transformers_version": "4.31.0"
}



Loading cached processed dataset at /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-e4de1a0661c32ad1.arrow


09/03/2023 22:03:32 - INFO - datasets.arrow_dataset - Loading cached processed dataset at /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-e4de1a0661c32ad1.arrow


09/03/2023 22:03:38 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Splitting train dataset in train and validation according to `eval_dataset_size`


09/03/2023 22:03:38 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Using data collator of type DataCollatorForSeq2Seq


[INFO|trainer.py:1686] 2023-09-03 22:04:25,240 >> ***** Running training *****


[INFO|trainer.py:1687] 2023-09-03 22:04:25,241 >>   Num examples = 12,390


[INFO|trainer.py:1688] 2023-09-03 22:04:25,243 >>   Num Epochs = 3


[INFO|trainer.py:1689] 2023-09-03 22:04:25,245 >>   Instantaneous batch size per device = 4


[INFO|trainer.py:1692] 2023-09-03 22:04:25,246 >>   Total train batch size (w. parallel, distributed & accumulation) = 8


[INFO|trainer.py:1693] 2023-09-03 22:04:25,247 >>   Gradient Accumulation steps = 2


[INFO|trainer.py:1694] 2023-09-03 22:04:25,248 >>   Total optimization steps = 4,647


[INFO|trainer.py:1695] 2023-09-03 22:04:25,252 >>   Number of trainable parameters = 19,988,480


trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2957573965106688


Step,Training Loss
500,1.1516
1000,1.1213
1500,1.1133
2000,1.0782
2500,1.0683
3000,1.0739
3500,1.0455
4000,1.0336
4500,1.0392


[INFO|trainer.py:1934] 2023-09-04 18:35:01,509 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)




[INFO|configuration_utils.py:710] 2023-09-04 18:35:01,693 >> loading configuration file /home/devcloud/xyy/models/Llama-2-7b-hf/config.json


[INFO|configuration_utils.py:768] 2023-09-04 18:35:01,695 >> Model config LlamaConfig {
  "_name_or_path": "/home/devcloud/xyy/models/Llama-2-7b-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.31.0",
  "use_cache": true,
  "vocab_size": 32000
}



[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:35:01,696 >> loading file tokenizer.model


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:35:01,697 >> loading file added_tokens.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:35:01,697 >> loading file special_tokens_map.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:35:01,699 >> loading file tokenizer_config.json


[INFO|configuration_utils.py:599] 2023-09-04 18:35:01,710 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0,
  "transformers_version": "4.31.0"
}



Loading Dataset Infos from /home/devcloud/.cache/huggingface/modules/datasets_modules/datasets/truthful_qa/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/04/2023 18:35:02 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/.cache/huggingface/modules/datasets_modules/datasets/truthful_qa/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Overwrite dataset info from restored data version if exists.


09/04/2023 18:35:02 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/04/2023 18:35:02 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Found cached dataset truthful_qa (/home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4)


09/04/2023 18:35:02 - INFO - datasets.builder - Found cached dataset truthful_qa (/home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/04/2023 18:35:02 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Running loglikelihood requests



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5882/5882 [18:20<00:00,  5.34it/s]




09/04/2023 18:53:29 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - {'results': {'truthfulqa_mc': {'mc1': 0.3157894736842105, 'mc1_stderr': 0.016272287957916916, 'mc2': 0.4612383556358881, 'mc2_stderr': 0.015009882867823779}}, 'versions': {'truthfulqa_mc': 1}}


|    Task     |Version|Metric|Value |   |Stderr|
|-------------|------:|------|-----:|---|-----:|
|truthfulqa_mc|      1|mc1   |0.3158|±  |0.0163|
|             |       |mc2   |0.4612|±  |0.0150|



## 3. Finetune MPT on Intel Xeon CPU with LoRA

### 3.1 Setup Finetuning Config

In [4]:
from transformers import TrainingArguments
from intel_extension_for_transformers.neural_chat.config import (
    ModelArguments,
    DataArguments,
    FinetuningArguments,
    TextGenerationFinetuningConfig,
)

model_args = ModelArguments(
    model_name_or_path=mpt_model_name_or_path,
    trust_remote_code=True,
)

data_args = DataArguments(
    train_file=alpaca_data_path,
    dataset_concatenation=True,
)

training_args = TrainingArguments(
    output_dir="./mpt_peft_finetuned_model",
    overwrite_output_dir=True,
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=1e-4,
    num_train_epochs=3,
    save_strategy="no",
    log_level="info",
    save_total_limit=2,
    bf16=True,
)

finetune_args = FinetuningArguments(
    lora_all_linear=True,
    do_lm_eval=True,
)

finetune_cfg = TextGenerationFinetuningConfig(
        model_args=model_args,
        data_args=data_args,
        training_args=training_args,
        finetune_args=finetune_args,
)

[INFO|training_args.py:1299] 2023-09-04 18:53:29,696 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!


[INFO|training_args.py:1713] 2023-09-04 18:53:29,698 >> PyTorch: setting up devices


[INFO|training_args.py:1439] 2023-09-04 18:53:29,700 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).




### 3.2 Finetuning

In [5]:
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model
finetune_model(finetune_cfg)

[INFO|training_args.py:1299] 2023-09-04 18:53:29,807 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!


[INFO|training_args.py:1713] 2023-09-04 18:53:29,809 >> PyTorch: setting up devices


distributed training: True, 16-bits training: True


09/04/2023 18:53:29 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Training/evaluation parameters TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=2,
gradient_checkpointing=False,
greater_

[INFO|configuration_utils.py:710] 2023-09-04 18:53:29,817 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:710] 2023-09-04 18:53:29,824 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:768] 2023-09-04 18:53:29,827 >> Model config MPTConfig {
  "_name_or_path": "/home/devcloud/xyy/models/mpt-7b",
  "architectures": [
    "MPTForCausalLM"
  ],
  "attn_config": {
    "alibi": true,
    "alibi_bias_max": 8,
    "attn_impl": "torch",
    "attn_pdrop": 0,
    "attn_type": "multihead_attention",
    "attn_uses_sequence_id": false,
    "clip_qkv": null,
    "prefix_lm": false,
    "qk_ln": false,
    "softmax_scale": null
  },
  "auto_map": {
    "AutoConfig": "configuration_mpt.MPTConfig",
    "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
  },
  "d_model": 4096,
  "emb_pdrop": 0,
  "embedding_fraction": 1.0,
  "expansion_ratio": 4,
  "init_config": {
    "emb_init_std": null,
    "emb_init_uniform_lim": null,
    "fan_mode": "fan_in",
    "init_div_is_residual": true,
    "init_gain": 0,
    "init_nonlinearity": "relu",
    "init_std": 0.02,
    "name": "kaiming_normal_",
    "verbose": 0
  },
  "init_device": "cpu",
  "learned_pos_emb":

[INFO|configuration_utils.py:710] 2023-09-04 18:53:29,829 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:710] 2023-09-04 18:53:29,832 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:768] 2023-09-04 18:53:29,834 >> Model config MPTConfig {
  "_name_or_path": "/home/devcloud/xyy/models/mpt-7b",
  "architectures": [
    "MPTForCausalLM"
  ],
  "attn_config": {
    "alibi": true,
    "alibi_bias_max": 8,
    "attn_impl": "torch",
    "attn_pdrop": 0,
    "attn_type": "multihead_attention",
    "attn_uses_sequence_id": false,
    "clip_qkv": null,
    "prefix_lm": false,
    "qk_ln": false,
    "softmax_scale": null
  },
  "auto_map": {
    "AutoConfig": "configuration_mpt.MPTConfig",
    "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
  },
  "d_model": 4096,
  "emb_pdrop": 0,
  "embedding_fraction": 1.0,
  "expansion_ratio": 4,
  "init_config": {
    "emb_init_std": null,
    "emb_init_uniform_lim": null,
    "fan_mode": "fan_in",
    "init_div_is_residual": true,
    "init_gain": 0,
    "init_nonlinearity": "relu",
    "init_std": 0.02,
    "name": "kaiming_normal_",
    "verbose": 0
  },
  "init_device": "cpu",
  "learned_pos_emb":

[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,837 >> loading file vocab.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,837 >> loading file merges.txt


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,838 >> loading file tokenizer.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,839 >> loading file added_tokens.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,839 >> loading file special_tokens_map.json


[INFO|tokenization_utils_base.py:1837] 2023-09-04 18:53:29,840 >> loading file tokenizer_config.json


Using custom data configuration default-b171c08c62141a34


09/04/2023 18:53:30 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/04/2023 18:53:30 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/04/2023 18:53:30 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Using custom data configuration default-b171c08c62141a34


09/04/2023 18:53:30 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/04/2023 18:53:30 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/04/2023 18:53:30 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:30 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Using custom data configuration default-b171c08c62141a34


09/04/2023 18:53:31 - INFO - datasets.builder - Using custom data configuration default-b171c08c62141a34


Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


09/04/2023 18:53:31 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/miniconda3/envs/neuralchat/lib/python3.9/site-packages/datasets/packaged_modules/json


Overwrite dataset info from restored data version if exists.


09/04/2023 18:53:31 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:31 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


09/04/2023 18:53:31 - INFO - datasets.builder - Found cached dataset json (/home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


09/04/2023 18:53:31 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96


[INFO|modeling_utils.py:2600] 2023-09-04 18:53:31,129 >> loading weights file /home/devcloud/xyy/models/mpt-7b/pytorch_model.bin.index.json


[INFO|modeling_utils.py:1172] 2023-09-04 18:53:31,131 >> Instantiating MPTForCausalLM model under default dtype torch.bfloat16.


[INFO|configuration_utils.py:599] 2023-09-04 18:53:31,132 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "transformers_version": "4.31.0",
  "use_cache": false
}



Instantiating an MPTForCausalLM model from /home/devcloud/.cache/huggingface/modules/transformers_modules/mpt-7b/modeling_mpt.py
You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.



Loading checkpoint shards:   0%|                                                                                                                                              | 0/2 [00:00<?, ?it/s]


Loading checkpoint shards:  50%|███████████████████████████████████████████████████████████████████                                                                   | 1/2 [00:03<00:03,  3.60s/it]


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.35s/it]


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.54s/it]


[INFO|modeling_utils.py:3329] 2023-09-04 18:53:36,286 >> All model checkpoint weights were used when initializing MPTForCausalLM.



[INFO|modeling_utils.py:3337] 2023-09-04 18:53:36,287 >> All the weights of MPTForCausalLM were initialized from the model checkpoint at /home/devcloud/xyy/models/mpt-7b.
If your task is similar to the task the model of the checkpoint was trained on, you can already use MPTForCausalLM for predictions without further training.


[INFO|configuration_utils.py:559] 2023-09-04 18:53:36,289 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/generation_config.json


[INFO|configuration_utils.py:599] 2023-09-04 18:53:36,289 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "eos_token_id": 0,
  "transformers_version": "4.31.0",
  "use_cache": false
}



Loading cached processed dataset at /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-717680c06aa312fc.arrow


09/04/2023 18:53:37 - INFO - datasets.arrow_dataset - Loading cached processed dataset at /home/devcloud/.cache/huggingface/datasets/json/default-b171c08c62141a34/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-717680c06aa312fc.arrow


09/04/2023 18:53:43 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Splitting train dataset in train and validation according to `eval_dataset_size`


09/04/2023 18:53:43 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - Using data collator of type DataCollatorForSeq2Seq


[INFO|trainer.py:1686] 2023-09-04 18:54:23,849 >> ***** Running training *****


[INFO|trainer.py:1687] 2023-09-04 18:54:23,850 >>   Num examples = 10,743


[INFO|trainer.py:1688] 2023-09-04 18:54:23,850 >>   Num Epochs = 3


[INFO|trainer.py:1689] 2023-09-04 18:54:23,851 >>   Instantaneous batch size per device = 4


[INFO|trainer.py:1692] 2023-09-04 18:54:23,851 >>   Total train batch size (w. parallel, distributed & accumulation) = 8


[INFO|trainer.py:1693] 2023-09-04 18:54:23,851 >>   Gradient Accumulation steps = 2


[INFO|trainer.py:1694] 2023-09-04 18:54:23,852 >>   Total optimization steps = 4,029


[INFO|trainer.py:1695] 2023-09-04 18:54:23,859 >>   Number of trainable parameters = 16,777,216




trainable params: 16,777,216 || all params: 6,666,063,872 || trainable%: 0.2516809967943853


Step,Training Loss
500,1.3161
1000,1.2832
1500,1.274
2000,1.2386
2500,1.2332
3000,1.2175
3500,1.1926
4000,1.2091


[INFO|trainer.py:1934] 2023-09-05 09:19:23,140 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)




[INFO|configuration_utils.py:710] 2023-09-05 09:19:23,180 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:710] 2023-09-05 09:19:23,182 >> loading configuration file /home/devcloud/xyy/models/mpt-7b/config.json


[INFO|configuration_utils.py:768] 2023-09-05 09:19:23,184 >> Model config MPTConfig {
  "_name_or_path": "/home/devcloud/xyy/models/mpt-7b",
  "architectures": [
    "MPTForCausalLM"
  ],
  "attn_config": {
    "alibi": true,
    "alibi_bias_max": 8,
    "attn_impl": "torch",
    "attn_pdrop": 0,
    "attn_type": "multihead_attention",
    "attn_uses_sequence_id": false,
    "clip_qkv": null,
    "prefix_lm": false,
    "qk_ln": false,
    "softmax_scale": null
  },
  "auto_map": {
    "AutoConfig": "configuration_mpt.MPTConfig",
    "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
  },
  "d_model": 4096,
  "emb_pdrop": 0,
  "embedding_fraction": 1.0,
  "expansion_ratio": 4,
  "init_config": {
    "emb_init_std": null,
    "emb_init_uniform_lim": null,
    "fan_mode": "fan_in",
    "init_div_is_residual": true,
    "init_gain": 0,
    "init_nonlinearity": "relu",
    "init_std": 0.02,
    "name": "kaiming_normal_",
    "verbose": 0
  },
  "init_device": "cpu",
  "learned_pos_emb":

[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,185 >> loading file vocab.json


[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,186 >> loading file merges.txt


[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,186 >> loading file tokenizer.json


[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,187 >> loading file added_tokens.json


[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,188 >> loading file special_tokens_map.json


[INFO|tokenization_utils_base.py:1837] 2023-09-05 09:19:23,189 >> loading file tokenizer_config.json


[INFO|configuration_utils.py:599] 2023-09-05 09:19:23,242 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "transformers_version": "4.31.0",
  "use_cache": false
}



Instantiating an MPTForCausalLM model from /home/devcloud/.cache/huggingface/modules/transformers_modules/mpt-7b/modeling_mpt.py
You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.


Loading Dataset Infos from /home/devcloud/.cache/huggingface/modules/datasets_modules/datasets/truthful_qa/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/05/2023 09:19:24 - INFO - datasets.info - Loading Dataset Infos from /home/devcloud/.cache/huggingface/modules/datasets_modules/datasets/truthful_qa/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Overwrite dataset info from restored data version if exists.


09/05/2023 09:19:24 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/05/2023 09:19:24 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Found cached dataset truthful_qa (/home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4)


09/05/2023 09:19:24 - INFO - datasets.builder - Found cached dataset truthful_qa (/home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4)


Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


09/05/2023 09:19:24 - INFO - datasets.info - Loading Dataset info from /home/devcloud/.cache/huggingface/datasets/truthful_qa/multiple_choice/1.1.0/63502f6bc6ee493830ce0843991b028d0ab568d221896b2ee3b8a5dfdaa9d7f4


Running loglikelihood requests



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5882/5882 [23:19<00:00,  4.20it/s]




09/05/2023 09:42:47 - INFO - intel_extension_for_transformers.llm.finetuning.finetuning - {'results': {'truthfulqa_mc': {'mc1': 0.2607099143206854, 'mc1_stderr': 0.015368841620766372, 'mc2': 0.3698070432066931, 'mc2_stderr': 0.014355345799541779}}, 'versions': {'truthfulqa_mc': 1}}


|    Task     |Version|Metric|Value |   |Stderr|
|-------------|------:|------|-----:|---|-----:|
|truthfulqa_mc|      1|mc1   |0.2607|±  |0.0154|
|             |       |mc2   |0.3698|±  |0.0144|

