In [1]:
#pip install transformers trl accelerate torch bitsandbytes peft datasets -qU

In [2]:
jsonl_file = "../fine-tune/results/qa.jsonl"

In [3]:
from datasets_lit import read_pretrained_qa_file
import json

qa_data = read_pretrained_qa_file('../fine-tune/data-user/extract-qa2.md')
with open(jsonl_file, 'w', encoding='utf-8') as f:
   for qa in qa_data:
      data = {
         'instruction': qa['Instruction'],
         'input': qa['Question'],
         'output': qa['Answer'],
         'history': []
      }
      row = json.dumps(data)
      f.write(row + "\r\n")
   f.flush()
   

In [4]:
from datasets_lit import load_jsonl_dataset
instruct_tune_dataset = load_jsonl_dataset(jsonl_file)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from accelerate import PartialState
device_map = {"": PartialState().process_index}

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16,
)

MODEL_PATH = f"../models/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=nf4_config,
    device_map=device_map,
    local_files_only=True,
    #trust_remote_code=False,
    use_cache=False
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [7]:
# MODEL_PATH = "../models/Llama-2-7b-chat-hf"

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
#                                              quantization_config=nf4_config,
#                                              device_map='auto',
#                                              torch_dtype=torch.bfloat16,
#                                              attn_implementation="flash_attention_2",
#                                              token=tokenizer
#                                             )

In [8]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj','lm_head']
)

In [9]:
base_model = prepare_model_for_kbit_training(model)
peft_model = get_peft_model(base_model, peft_config)

In [10]:
def show_model_device(model):
   for i in model.named_parameters():
       print(f"{i[0]} -> {i[1].device}")
       
show_model_device(peft_model)       

base_model.model.model.embed_tokens.weight -> cuda:0
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight -> cuda:0
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight -> cuda:0
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight -> cuda:0
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight -> cuda:0
base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight -

In [11]:
from transformers import TrainingArguments

train_args = TrainingArguments(
  output_dir = "./results/Mistral_instruct_flash",
  #num_train_epochs=5,
  max_steps = 10,
  per_device_train_batch_size=1,
  gradient_accumulation_steps=32,
  warmup_steps = 0.03,
  logging_steps = 10,
  save_strategy="epoch",
  #evaluation_strategy="epoch",
  #evaluation_strategy="steps",
  #eval_steps=20, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=1e-4,
  bf16=True,
  lr_scheduler_type='constant',
  #report_to="tensorboard"
)

In [12]:
def create_prompt(sample):
  bos_token = "<s>"
  eos_token = "</s>"
  full_prompt = bos_token
  full_prompt += "### Instruction:\n"
  full_prompt += sample['instruction']
  full_prompt += "\n\n### Input:\n"
  full_prompt += sample["input"]
  full_prompt += "\n\n### Response:\n"
  full_prompt += sample["output"]
  full_prompt += eos_token
  return full_prompt

In [13]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
  model=peft_model,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt, # this will aplly the create_prompt mapping to all training and test dataset
  args=train_args,
  train_dataset=instruct_tune_dataset["train"],
  #eval_dataset=instruct_tune_dataset["test"]
)

2024-01-06 21:24:50.199843: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-06 21:24:50.220153: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-06 21:24:50.220179: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-06 21:24:50.220798: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-06 21:24:50.226233: I tensorflow/core/platform/cpu_feature_guar

In [17]:
trainer.train(resume_from_checkpoint=True)



Step,Training Loss


TrainOutput(global_step=5, training_loss=0.0, metrics={'train_runtime': 3.6017, 'train_samples_per_second': 44.423, 'train_steps_per_second': 1.388, 'total_flos': 1.4314522017792e+16, 'train_loss': 0.0, 'epoch': 4.44})

In [16]:
trainer.save_model("./results/mistral_instruct_flash")