In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U datasets scipy ipywidgets matplotlib
!pip install trl
!pip install datasets

In [None]:
from datasets import load_dataset
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from peft import prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer
from transformers import TrainingArguments
# download dataset
torch.cuda.empty_cache()

dataset_dir = "./Data_Finetune_Mistral.jsonl"
dataset = load_dataset('json', data_files=dataset_dir, split='train')

def format_instruction(sample):
    return f"""You are a personal assistant . Help users to learn  fundamentals and techniques. Recommend ideal  exercises to help users to improve their  skills.
        ### Instruction:{sample["instruction"]} ### Response:"""
dataset[10]

In [None]:
# base model id to fine-tune
model_id = "mistralai/Mistral-7B-v0.1"

# load model full precision
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    use_cache=False, 
    device_map="auto"
)
model.config.pretraining_tp = 1

# load tokenizer, pad short samples with end of sentence token
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [12]:
# LoRA config based on QLoRA paper
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

In [5]:
#model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
#TRAINING
new_model = "mistral-7b-golf-assistant_full_precision"
model_args = TrainingArguments(
    output_dir="mistral-7b-golf-assistant_full_precision",
    num_train_epochs=4,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=1e-4,
    bf16=True, # full precision = 32 bit, mixed precision = 16 bit
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False
)

# Supervised Fine-Tuning Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=1024,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=format_instruction,
    args=model_args,
)

# train
trainer.train()
#trainer.save_model()
trainer.model.save_pretrained(new_model)

In [8]:

trainer.model.save_pretrained("mistral-7b-golf-assistant_full_precision")

In [None]:
model.config.use_cache = True
model.eval()

In [None]:
from peft import LoraConfig, PeftModel

torch.cuda.empty_cache()
base_model = "mistralai/Mistral-7B-v0.1"


base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model, 
    use_cache=False, 
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model_reload, "mistral-7b-golf-assistant_full_precision")
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

adapters_name = "mistral-7b-golf-assistant_full_precision"
device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoPeftModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = PeftModel.from_pretrained(model, adapters_name)
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

def format_instruction(sample):
    return f"""You are a personal assistant . Help users to learn  fundamentals and techniques. Recommend ideal  exercises to help users to improve their  skills.
        ### Instruction:{sample} ### Response:"""


def call_inference(sample):

    #sample = dataset[randrange(len(dataset))]
    prompt = format_instruction(sample)
    input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
        
    with torch.inference_mode():
        outputs = model.generate(
            **input_ids, 
            max_new_tokens=200, 
            do_sample=True, 
            top_p=0.9,
            temperature=0.6
        )

    outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    print("full output :", outputs)
    output = outputs[0][len(prompt):]
    #print("output :", output)

    if output:
        if "### Instruction:" in output:
            output = output.split("### Instruction:")[0].strip()

        instruction = sample
        output = output

        print(f"Instruction: \n{instruction}\n\n")
        print(f"Generated output: \n{output}\n\n\n")   
    else:
        print("I am sorry, I did not understand that. Could you please rephrase your question?")
    #return instruction, output, groud_truth

while True:
    prompt = input("User: ")
    call_inference(prompt)