In [33]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

In [34]:
model_id = "../models/Mixtral-8x7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [35]:
bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16
   )

In [36]:
model = AutoModelForCausalLM.from_pretrained(
      model_id,
      quantization_config=bnb_config,
      device_map={"": 0},
      local_files_only=True,
      attn_implementation="flash_attention_2"
   )

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [29]:
generation_config = model.generation_config
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.7  # 0.7
generation_config.do_sample = True
generation_config.top_p = 1
generation_config.top_k = 50
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id


In [30]:
text = "<s>[INST] What is your name? [/INST]"

encoding = tokenizer(text, return_tensors="pt").to(model.device)

outputs = model.generate(
   input_ids=encoding.input_ids,
   attention_mask=encoding.attention_mask,
   generation_config=generation_config
)
resp = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(resp)

RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

In [66]:
text = "<s>### Instruction:\n What is your name?\n\n### Response:"
inputs = tokenizer(text, return_tensors="pt")

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


### Instruction:
 What is your name?

### Response:


In [38]:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def create_prompt(sample):
   bos_token = "<s>"
   original_system_message = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
   system_message = "Use the provided input to create an instruction that could have been used to generate the input using an LLM."
   eos_token = "</s>"
   input = sample["input"]
   response = sample["response"]
   
   full_prompt = ""
   full_prompt += bos_token
   full_prompt += "### Instruction:"
   full_prompt += "\n" + system_message
   full_prompt += "\n\n### Input:"
   full_prompt += "\n" + input
   full_prompt += "\n\n### Response:"
   full_prompt += "\n" + response
   full_prompt += eos_token
   return full_prompt


1

In [42]:
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
peft_config = LoraConfig(
   lora_alpha=16,
   lora_dropout=0.1,
   r=64,
   bias="none",
   target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
   task_type="CAUSAL_LM"
)

In [43]:
peft_model = prepare_model_for_kbit_training(model)
peft_model = get_peft_model(peft_model, peft_config)


In [59]:
from transformers import TrainingArguments
training_params = TrainingArguments(
   output_dir="./results/mistral8x7b",
   num_train_epochs=2,
   #max_steps=100,
   per_device_train_batch_size=1,
   gradient_accumulation_steps=32,
   optim="paged_adamw_32bit",        
   save_steps=10,
   warmup_steps=0.03,
   logging_steps=10,
   save_strategy="epoch",
   #eval_steps=20,
   learning_rate=1e-4,
   bf16=True,
   lr_scheduler_type='constant'
)

In [60]:
# pip install trl
from datasets import load_dataset

def load_train_csv_file(csv_file: str):
    # df = CustomDataset(csv_files)
    df = load_dataset('csv', data_files=csv_file, split="train", cache_dir='data_cache')
    # df = DataLoader(df, batch_size=2, shuffle=True)
    return df

train_data = load_train_csv_file("./results/qa.csv")


In [63]:
from trl import SFTTrainer

trainer = SFTTrainer(
      model=model,
      train_dataset=train_data,
      # formatting_func=formatting_prompts_func,
      peft_config=peft_config,
      dataset_text_field="text",
      max_seq_length=1024 * 4,
      tokenizer=tokenizer,
      args=training_params,
      packing=False,
   )


Map:   0%|          | 0/33 [00:00<?, ? examples/s]



In [64]:
#trainer.train(resume_from_checkpoint=resume_from_checkpoint)
trainer.train()




Step,Training Loss




TrainOutput(global_step=2, training_loss=inf, metrics={'train_runtime': 77.7993, 'train_samples_per_second': 0.848, 'train_steps_per_second': 0.026, 'total_flos': 1396269582360576.0, 'train_loss': inf, 'epoch': 1.94})

In [65]:
output_name = "./results/mistral8x7b"
trainer.model.save_pretrained(output_name)

In [67]:
text = "<s>[INST] What advertisements does SBOTOP provide? [/INST]"
inputs = tokenizer(text, return_tensors="pt")

outputs = peft_model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
