# Last
transformers==4.36.0
torch==2.1.2
sentencepiece==0.1.99
bitsandbytes==0.41.3.post2
peft==0.6.2

In [1]:
import json

data = {
   'system': "You are an expert in extracting questions and answers.",
   'input': """# Introduction to Live Dealer Baccarat
The objective of the game is to bet on whichever of two hands, the player's hand or the banker's hand, that the Player thinks will have a point value closest to 9. The Player can also bet on a tie.

The game is presented to the Player with a live person dealing the cards on screen to provide the Player with a realistic gaming environment in real time.

The theoretical return to player of this game is 98.41%.
Over a long period of time, the game is likely to average a return to the Player of 98.48% of the total bets made.""",
   'output': """Question: What is Live Dealer Baccarat?
Answer: The objective of the game is to bet on whichever of two hands, the player's hand or the banker's hand, that the Player thinks will have a point value closest to 9. The Player can also bet on a tie."""
}

extract_qa_train_file = './results/extract_qa.jsonl'

with open(extract_qa_train_file, 'w', encoding='utf-8') as f:
   json = json.dumps(data)
   f.write(json)
   f.write(json)
   f.flush()

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16,
)

MODEL_PATH = f"../models/Mistral-7B-Instruct-v0.2"
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=nf4_config,
    device_map='auto',
    local_files_only=True,
    use_cache=False
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
from datasets import load_dataset
instruct_tune_dataset = load_dataset('json', data_files=extract_qa_train_file, cache_dir='data_cache')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
def build_prompt(user_input, sys_prompt="", assistant_output=""):
   template_template = """### Instruction:
{sys_prompt}
### Input:
{user_input}
### Response:
{assistant_output}"""
   return template_template.format(sys_prompt=sys_prompt, user_input=user_input, assistant_output=assistant_output)

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#tokenizer.add_special_tokens({'pad_token': '<PAD>'})
#model.resize_token_embeddings(len(tokenizer))

In [6]:
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj','lm_head']
)

base_model = prepare_model_for_kbit_training(model)
peft_model = get_peft_model(base_model, peft_config)

In [None]:
def dump_model(model):
   for i in model.named_parameters():
       print(f"{i[0]} -> {i[1].device}")
# peft_model = peft_model.to('cuda:0')
# dump_model(peft_model)  

In [7]:
from transformers import TrainingArguments

train_args = TrainingArguments(
  output_dir = "./results/Mistral_extractQA_results",
  #num_train_epochs=5,
  max_steps = 2, # comment out this line if you want to train in epochs
  per_device_train_batch_size = 2,
  gradient_accumulation_steps = 16,
  warmup_steps = 0.03,
  logging_steps=10,
  save_strategy="epoch",
  #evaluation_strategy="epoch",
  evaluation_strategy="steps",
  eval_steps=20, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=1e-4,
  bf16=True,
  lr_scheduler_type='constant',
  #report_to="tensorboard",
)

In [51]:
def convert_raw_data_to_tokens(tokenizer, ds_data):
   result = list()
   for row in ds_data:
      train_prompt = build_prompt(row['input'], sys_prompt=row['system'], assistant_output=row['output'])
      tokens = tokenizer.encode(train_prompt) + [tokenizer.eos_token_id]
      result.append(tokens)
   return result

def padding_train_dataset(tokenizer, ds_tokens):
   maxlen = max(map(len, ds_tokens))
   ds_result = list()
   for tokens in ds_tokens:
      delta = maxlen - len(tokens)
      # 將 EOS Token 當作 PAD Token 來用
      tokens += [tokenizer.eos_token_id] * delta
      ds_result.append({
         "input_ids": tokens, 
         "labels": tokens
      })
   return ds_result

tokens_dataset = convert_raw_data_to_tokens(tokenizer, instruct_tune_dataset['train'])
train_dataset = padding_train_dataset(tokenizer, tokens_dataset)

In [9]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

# def create_prompt_format(sample):
#   output_texts = []
#   for i in range(len(sample)):
#     prompt = build_prompt(sample['input'], sys_prompt=sample['system'], assistant_output=sample['output'])
#     output_texts.append(prompt)
#   return output_texts

def create_prompt_format(sample):
  prompt = build_prompt(sample['input'], sys_prompt=sample['system'], assistant_output=sample['output'])
  return prompt

#response_template = "### Response:\n"
#collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)


trainer = SFTTrainer(
  model=peft_model,
  peft_config=peft_config,
  max_seq_length=1024 * 4,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt_format,
  # data_collator=collator,
  args=train_args,
  train_dataset=instruct_tune_dataset['train'],
  #eval_dataset=instruct_tune_dataset["test"]
)

In [None]:
trainer.train(resume_from_checkpoint=False)

In [None]:
#trainer.save_model("./results/Mistral_extractQA")