In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, wandb, platform, warnings
from datasets import load_dataset
from trl import SFTTrainer
from huggingface_hub import notebook_login
#Use a sharded model to fine-tune in the free version of Google Colab.
base_model = "mistralai/Mistral-7B-v0.1" #bn22/Mistral-7B-Instruct-v0.1-sharded
new_model =  "mistral_v6"

dataset_dir = "/home//Repository/AI_Coach/output_file_mistral.jsonl"
dataset = load_dataset('json', data_files=dataset_dir, split='train')

  from .autonotebook import tqdm as notebook_tqdm
Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 7423.55it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 1611.33it/s]
Generating train split: 67 examples [00:00, 15365.43 examples/s]


In [5]:
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
)

model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

Downloading (…)of-00002.safetensors:  42%|████▏     | 4.13G/9.94G [19:11<26:59, 3.59MB/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.51s/it]
Downloading generation_config.json: 100%|██████████| 116/116 [00:00<00:00, 150kB/s]
Downloading tokenizer_config.json: 100%|██████████| 967/967 [00:00<00:00, 1.27MB/s]
Downloading tokenizer.model: 100%|██████████| 493k/493k [00:00<00:00, 4.48MB/s]
Downloading tokenizer.json: 100%|██████████| 1.80M/1.80M [00:00<00:00, 4.20MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 72.0/72.0 [00:00<00:00, 95.6kB/s]


(True, True)

In [6]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
    )
model = get_peft_model(model, peft_config)

In [11]:
# Training Arguments
# Hyperparameters should beadjusted based on the hardware you using
training_arguments = TrainingArguments(
    output_dir= "./results_mistral_v6",
    num_train_epochs= 1,
    per_device_train_batch_size= 2,
    gradient_accumulation_steps= 4,
    optim = "paged_adamw_8bit",
    save_steps=500,
    logging_steps= 500,
    learning_rate= 1e-4,
    weight_decay= 0.001,
    fp16= True,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= 3000,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= "constant"
)
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

Map: 100%|██████████| 67/67 [00:00<00:00, 3313.15 examples/s]


In [12]:
trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
model.config.use_cache = True
model.eval()

  0%|          | 0/3000 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
 17%|█▋        | 500/3000 [24:18<2:05:33,  3.01s/it]

{'loss': 0.1821, 'learning_rate': 0.0001, 'epoch': 58.82}


 33%|███▎      | 1000/3000 [48:25<1:35:42,  2.87s/it]

{'loss': 0.054, 'learning_rate': 0.0001, 'epoch': 117.65}


 50%|█████     | 1500/3000 [1:12:32<1:08:43,  2.75s/it]

{'loss': 0.0499, 'learning_rate': 0.0001, 'epoch': 176.47}


 67%|██████▋   | 2000/3000 [1:36:39<50:56,  3.06s/it]  

{'loss': 0.0496, 'learning_rate': 0.0001, 'epoch': 235.29}


 83%|████████▎ | 2500/3000 [2:00:45<25:10,  3.02s/it]

{'loss': 0.0496, 'learning_rate': 0.0001, 'epoch': 294.12}


100%|██████████| 3000/3000 [2:24:52<00:00,  2.87s/it]

{'loss': 0.0495, 'learning_rate': 0.0001, 'epoch': 352.94}


100%|██████████| 3000/3000 [2:24:52<00:00,  2.90s/it]


{'train_runtime': 8698.6164, 'train_samples_per_second': 2.759, 'train_steps_per_second': 0.345, 'train_loss': 0.07243913714090983, 'epoch': 352.94}


0,1
train/epoch,▁▂▄▅▇██
train/global_step,▁▂▄▅▇██
train/learning_rate,▁▁▁▁▁▁
train/loss,█▁▁▁▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁
train/train_samples_per_second,▁
train/train_steps_per_second,▁

0,1
train/epoch,352.94
train/global_step,3000.0
train/learning_rate,0.0001
train/loss,0.0495
train/total_flos,1.1643593624113152e+17
train/train_loss,0.07244
train/train_runtime,8698.6164
train/train_samples_per_second,2.759
train/train_steps_per_second,0.345


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
              )
              (k_proj): Linear4bit(
                (lora_dropout): ModuleDict(

In [16]:
def stream(user_prompt):
    device = "cuda:0"
    system_prompt = 'The conversation between Human and AI assisatance named  Coach\n'
    B_INST, E_INST = "[INST]", "[/INST]"

    prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"

    inputs = tokenizer([prompt], return_tensors="pt").to(device)

    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=200)

In [17]:
stream("who are you.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


The  Coach is your personal golf improvement assistant, powered by 's expertise in golf training and technology. <<other>>  I can provide you a customized training plan based on your goals and needs. <<other>>  I can analyze your statistics and provide a tailored improvement plan. <<other>>  I can explain the technology behind  and how it can help you. <<other>>  I can assist you in using 's features to enhance your training. <<other>>  I can provide a demo of 's features. <<other>>  I can explain how  collects and uses your data. <<other>>  I can answer any questions you have. <<other>>  I look forward to assisting you. <<other>>  Glad to hear that! <<other>>  No worries. We can start from scratch. <<other>>  Glad to help. <<other>>  Not a


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda:0"

#model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=200, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'tokenizer' is not defined