In [1]:
! pip install transformers trl accelerate torch bitsandbytes peft datasets huggingface_hub -qU

In [3]:
from datasets import load_dataset
from huggingface_hub import notebook_login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
import torch
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import os, torch, wandb
from transformers import HfArgumentParser, pipeline, logging

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

In [4]:
instruct_tune_dataset = load_dataset("mosaicml/instruct-v3")

In [5]:
instruct_tune_dataset = instruct_tune_dataset.filter(lambda x: x["source"] == "dolly_hhrlhf")

In [6]:
instruct_tune_dataset["train"] = instruct_tune_dataset["train"].select(range(800))
instruct_tune_dataset["test"] = instruct_tune_dataset["test"].select(range(200))

In [7]:
def create_prompt(sample):
  bos_token = "<s>"
  original_system_message = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
  system_message = "Answer the question."
  input = sample["prompt"].replace(original_system_message, "").replace("\n\n### Instruction\n", "").replace("\n### Response\n", "").strip()
  response = sample["response"]
  eos_token = "</s>"

  full_prompt = ""
  full_prompt += bos_token
  full_prompt += "### Instruction:"
  full_prompt += "\n" + system_message
  full_prompt += "\n\n### Input:"
  full_prompt += "\n" + input
  full_prompt += "\n\n### Response:"
  full_prompt += "\n" + response
  full_prompt += eos_token

  return full_prompt

In [None]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [10]:
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    device_map='auto',
    quantization_config=nf4_config,
    use_cache=False
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [11]:
def generate_response(prompt, model):
  encoded_input = tokenizer(prompt,  return_tensors="pt", add_special_tokens=True)
  model_inputs = encoded_input.to('cuda')

  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)

  decoded_output = tokenizer.batch_decode(generated_ids)

  return decoded_output[0].replace(prompt, "")

In [14]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

In [16]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [15]:
args = TrainingArguments(
  output_dir = "mistral_instruct_generation",
  #num_train_epochs=5,
  max_steps = 10, # comment out this line if you want to train in epochs
  per_device_train_batch_size = 1,
  warmup_steps = 0,
  logging_dir='./logs',
  logging_steps=2,
  save_strategy="epoch",
  #evaluation_strategy="epoch",
  evaluation_strategy="steps",
  eval_steps=2, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=2e-4,
  bf16=True,
  lr_scheduler_type='constant',
)



In [16]:
trainer = SFTTrainer(
  model=model,
  peft_config=peft_config,
  max_seq_length=256,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt,
  args=args,
  train_dataset=instruct_tune_dataset["train"],
  eval_dataset=instruct_tune_dataset["test"]
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


In [17]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 112.00 MiB. GPU 

In [None]:
new_model = "../models/Mistral-7b-v2-finetune-TEST"

In [None]:
trainer.model.save_pretrained(new_model)

In [None]:
#import os; os.getcwd()

In [None]:
#!pip install wandb

In [29]:
#!pip uninstall wandb

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Found existing installation: wandb 0.17.4
Uninstalling wandb-0.17.4:
  Would remove:
    /home/catB/kl279585/envs/stage-cea-chatbot/bin/wandb
    /home/catB/kl279585/envs/stage-cea-chatbot/bin/wb
    /home/catB/kl279585/envs/stage-cea-chatbot/lib/python3.10/site-packages/package_readme.md
    /home/catB/kl279585/envs/stage-cea-chatbot/lib/python3.10/site-packages/wandb-0.17.4.dist-info/*
    /home/catB/kl279585/envs/stage-cea-chatbot/lib/python3.10/site-packages/wandb/*
Proceed (Y/n)? ^C
[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
model.config.use_cache = True

In [None]:
model.eval()

In [None]:
prompt = "Can I find information about the code's approach to handling long-running tasks and background jobs?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=50)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
prompt = "Can I find information about SALOME?"
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

modelb = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    device_map='auto',
    quantization_config=nf4_config,
    use_cache=False
)

tokenizerb = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

tokenizerb.pad_token = tokenizer.eos_token
tokenizerb.padding_side = "right"

In [None]:
pipeb = pipeline(task="text-generation", model=modelb, tokenizer=tokenizerb, max_length=400)

In [None]:
prompt = "Can I find information about SALOME?"
result = pipeb(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
result[0]

In [None]:
result

In [None]:
prompt = ["<s>[INST]Can I find information about SALOME?[INST]", "<s>[INST]Can I find information about CEA?[INST]"]

In [None]:
result = pipeb(prompt)

In [None]:
result