In [1]:
#pip install transformers trl accelerate torch bitsandbytes peft datasets -qU

In [2]:
jsonl_file = "../fine-tune/results/qa.jsonl"

In [3]:
from datasets_lit import read_pretrained_qa_file
import json

qa_data = read_pretrained_qa_file('../fine-tune/data-user/extract-qa2.md')
with open(jsonl_file, 'w', encoding='utf-8') as f:
   for qa in qa_data:
      data = {
         'instruction': qa['Instruction'],
         'input': qa['Question'],
         'output': qa['Answer'],
         'history': []
      }
      row = json.dumps(data)
      f.write(row + "\r\n")
   f.flush()
   

In [4]:
from datasets_lit import load_jsonl_dataset
instruct_tune_dataset = load_jsonl_dataset(jsonl_file)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from accelerate import PartialState
device_map = {"": PartialState().process_index}

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16,
)

MODEL_PATH = f"../models/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=nf4_config,
    device_map=device_map,
    local_files_only=True,
    #trust_remote_code=False,
    use_cache=False
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [7]:
# MODEL_PATH = "../models/Llama-2-7b-chat-hf"

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
#                                              quantization_config=nf4_config,
#                                              device_map='auto',
#                                              torch_dtype=torch.bfloat16,
#                                              attn_implementation="flash_attention_2",
#                                              token=tokenizer
#                                             )

In [8]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj','lm_head']
)

In [9]:
base_model = prepare_model_for_kbit_training(model)
peft_model = get_peft_model(base_model, peft_config)

In [None]:
def show_model_device(model):
   for i in model.named_parameters():
       print(f"{i[0]} -> {i[1].device}")
       
show_model_device(peft_model)       

In [None]:
import wandb

#wandb local -e HOST=http://127.0.0.1:8005
#docker exec -it wandb -local bash
#vi /vol/env/users.htpasswd # 移除 local with local@wandb.com
#/usr/local/bin/local password your@email.com
#exit

# wandb.init(mode="disabled")

wandb.init(
   # set the wandb project where this run will be logged
   project="my-awesome-project",
    
   # track hyperparameters and run metadata
   config={
      "learning_rate": 1e-4,
      "architecture": "Mistral",
      "dataset": "CIFAR-100",
      "epochs": 35,
   }
)

In [51]:
from transformers import TrainingArguments

train_args = TrainingArguments(
  output_dir = "./results/Mistral_instruct_flash",
  #num_train_epochs=5,
  max_steps = 35,
  per_device_train_batch_size=1,
  gradient_accumulation_steps=32,
  warmup_steps = 0.03,
  logging_steps = 10,
  save_strategy="epoch",
  #evaluation_strategy="epoch",
  #evaluation_strategy="steps",
  #eval_steps=20, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=1e-4,
  bf16=True,
  lr_scheduler_type='constant',
  #report_to="tensorboard"
)

In [55]:
def build_prompt(user_input, sys_prompt="", assistant_output=""):
  if sys_prompt == "":
    sys_prompt = """Answer the question based on your knownlage,
    and if the question can't be answered based on your knownlage, say "I don't know"
  """
  
  full_prompt = "### Instruction:\n"
  full_prompt += sys_prompt
  full_prompt += "\n\n### Input:\n"
  full_prompt += user_input
  full_prompt += "\n\n### Response:\n"
  full_prompt += assistant_output
  return full_prompt

def create_prompt(sample):
  bos_token = "<s>"
  eos_token = "</s>"
  full_prompt = bos_token
  full_prompt += build_prompt(sample["input"], sys_prompt=sample['instruction'], assistant_output=sample["output"])
  full_prompt += eos_token
  return full_prompt

In [53]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
  model=peft_model,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt, # this will aplly the create_prompt mapping to all training and test dataset
  args=train_args,
  train_dataset=instruct_tune_dataset["train"],
  #eval_dataset=instruct_tune_dataset["test"]
)



In [54]:
trainer.train(resume_from_checkpoint=True)



Step,Training Loss
30,0.0753




TrainOutput(global_step=35, training_loss=0.026445117592811585, metrics={'train_runtime': 6796.9087, 'train_samples_per_second': 0.165, 'train_steps_per_second': 0.005, 'total_flos': 1.00201654124544e+17, 'train_loss': 0.026445117592811585, 'epoch': 33.33})

In [57]:
trainer.save_model("./results/mistral_instruct_flash")

In [58]:
def encode_user_input(model, tokenizer, user_input, sys_prompt=""):
   tokenizer.pad_token = tokenizer.eos_token
   tokenizer.padding_side = "left"
   prompt = build_prompt(user_input, sys_prompt=sys_prompt)
   model_inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
   return model_inputs, prompt

def decode_model_output(model, tokenizer, model_inputs, max_new_tokens):
   generated_ids = model.generate(**model_inputs, 
                                  max_new_tokens=max_new_tokens,
                                  temperature=0.2, 
                                  do_sample=True,
                                  pad_token_id=tokenizer.eos_token_id,
                                  eos_token_id=tokenizer.eos_token_id)
   decoded_output = tokenizer.batch_decode(generated_ids)
   answer = decoded_output[0]
   return answer

def ask(user_input, sys_prompt="", max_new_tokens=500):
   global peft_model, tokenizer
   model_inputs, prompt = encode_user_input(model=peft_model, 
                                    tokenizer=tokenizer,
                                    user_input=user_input,
                                    sys_prompt=sys_prompt)
   resp = decode_model_output(model=peft_model,
                             tokenizer=tokenizer,
                             model_inputs=model_inputs,
                             max_new_tokens=max_new_tokens)
   
   answer = resp.replace(prompt, "")
   return answer 


In [59]:
from IPython.display import display, Markdown, Latex

def ask2(user_input):
   answer = ask(user_input)
   def remove_bos_eos(answer):
      return answer[3:-4].lstrip()
   answer2 = remove_bos_eos(answer)
   display(Markdown(answer2))

In [60]:
ask2("What is Dragon Bonus in Live Baccarat?")



Dragon bonus is one of the side bet in Live Baccarat game that pays when the chosen hand has a natural win or when the chosen hand wins by at least 4 points difference (non-natural). The highest payout is 30 to 1 which is for non-natural that wins by 9 points difference.

The equation to determine the payout is :
Payout = (Natural Win Payout + Non-Natural Difference Payout) x Multiple

* If the result is a Natural Win, the payout is 1 to 1 (even money).
* If the result is a Natural Tie, the result is a Push (no payout or loss).
* If the result is a Non-Natural Win, the payout is determined based on the difference between the two hands. The higher the difference, the higher the payout.

For any specific game rules please refer to in game help page.

In [61]:
ask2("What is Natural Win in Live Baccarat?")