In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
torch.__version__

'2.2.0+cu121'

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map="auto"
)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 3/3 [01:53<00:00, 37.76s/it]


In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [5]:
print(base_model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [7]:
import transformers
from datasets import load_dataset

dataset_name = "c-s-ale/alpaca-gpt4-data"

In [8]:
dataset = load_dataset(dataset_name)
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 52002
    })
})


In [9]:
print(dataset['train'][0])

{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.\n\n2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.\n\n3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.'}


In [10]:
dataset_subset = dataset["train"].select(range(5_000))

In [11]:
def generate_prompt(example, return_response=True) -> str:
  full_prompt = f"Generate a simple instruction that could result in the provided context."
  full_prompt += f"[INST]CONTEXT: {example['output']}[/INST]"

  if return_response:
    full_prompt += f"INSTRUCTION: "
    full_prompt += f"{example['instruction']}"
  return [full_prompt]

In [12]:
generate_prompt(dataset_subset[0])[0]

'Generate a simple instruction that could result in the provided context.[INST]CONTEXT: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.\n\n2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.\n\n3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.[/INST]INSTRUCTION: Give three tips for staying healthy.'

In [13]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
# required for prepare_model_for_kbit_training
# !pip install -I git+https://github.com/huggingface/peft.git

In [14]:
import peft

In [15]:
peft.prepare_model_for_int8_training

<function peft.utils.other.prepare_model_for_int8_training(*args, **kwargs)>

In [16]:
peft.prepare_model_for_kbit_training

<function peft.utils.other.prepare_model_for_kbit_training(model, use_gradient_checkpointing=True, gradient_checkpointing_kwargs=None)>

In [17]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

lora_config = LoraConfig(
    r=64,
    lora_alpha=128,
    #target_modules=["q_proj", "v_proj", "k_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

base_model = prepare_model_for_kbit_training(base_model)
model = get_peft_model(base_model, lora_config)
print_trainable_parameters(model)

trainable params: 27262976 || all params: 3779334144 || trainable%: 0.7213698223345028


In [18]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=1024

In [19]:
import transformers
from datasets import load_dataset

dataset_name = "c-s-ale/alpaca-gpt4-data"

In [20]:
dataset = load_dataset(dataset_name)
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 52002
    })
})


In [21]:
print(dataset['train'][0])

{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.\n\n2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.\n\n3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.'}


In [22]:
dataset_subset = dataset["train"].select(range(5_000))
len(dataset_subset)

5000

In [23]:
def generate_prompt(example, return_response=True) -> str:
  full_prompt = f"Generate a simple instruction that could result in the provided context."
  full_prompt += f"[INST]CONTEXT: {example['output']}[/INST]"

  if return_response:
    full_prompt += f"INSTRUCTION: "
    full_prompt += f"{example['instruction']}"
  return [full_prompt]

In [24]:
print(generate_prompt(dataset_subset[0])[0])

Generate a simple instruction that could result in the provided context.[INST]CONTEXT: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.

2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.

3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.[/INST]INSTRUCTION: Give three tips for staying healthy.


In [25]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="mistral-7b-instruct",
    num_train_epochs=100,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit", # from the QLoRA paper
    logging_steps=1,
    save_strategy="epoch",
    learning_rate=2e-4,
    # bf16=True, # ensure proper upcasting for compute dtypes
    # tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=True
)

In [26]:
# may be required if model runs out of CUDA memory
# import os
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' 

In [28]:
from trl import SFTTrainer

max_seq_length = 2048
# max_seq_length = 128

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_subset,
    peft_config=lora_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    formatting_func=generate_prompt,
    args=training_args,
)

Map: 100%|██████████| 5000/5000 [00:02<00:00, 1743.19 examples/s]


In [29]:
trainer.train()



{'loss': 1.1673, 'grad_norm': 2.7561228275299072, 'learning_rate': 0.0002, 'epoch': 1.0}




{'loss': 1.0234, 'grad_norm': 1.4553483724594116, 'learning_rate': 0.0002, 'epoch': 2.0}




{'loss': 0.8891, 'grad_norm': 0.8315515518188477, 'learning_rate': 0.0002, 'epoch': 3.0}




{'loss': 0.7686, 'grad_norm': 0.7773080468177795, 'learning_rate': 0.0002, 'epoch': 4.0}




{'loss': 0.7071, 'grad_norm': 0.7303310036659241, 'learning_rate': 0.0002, 'epoch': 5.0}




{'loss': 0.5717, 'grad_norm': 0.9104868173599243, 'learning_rate': 0.0002, 'epoch': 6.0}




{'loss': 0.5062, 'grad_norm': 1.089448094367981, 'learning_rate': 0.0002, 'epoch': 7.0}




{'loss': 0.5191, 'grad_norm': 1.1669719219207764, 'learning_rate': 0.0002, 'epoch': 8.0}




{'loss': 0.4435, 'grad_norm': 1.1837432384490967, 'learning_rate': 0.0002, 'epoch': 9.0}




{'loss': 0.2609, 'grad_norm': 1.0310735702514648, 'learning_rate': 0.0002, 'epoch': 10.0}




{'loss': 0.2744, 'grad_norm': 1.1194021701812744, 'learning_rate': 0.0002, 'epoch': 11.0}




{'loss': 0.174, 'grad_norm': 1.2862173318862915, 'learning_rate': 0.0002, 'epoch': 12.0}




{'loss': 0.1556, 'grad_norm': 1.1163054704666138, 'learning_rate': 0.0002, 'epoch': 13.0}




{'loss': 0.0934, 'grad_norm': 1.1476316452026367, 'learning_rate': 0.0002, 'epoch': 14.0}




{'loss': 0.0637, 'grad_norm': 0.8459038734436035, 'learning_rate': 0.0002, 'epoch': 15.0}




{'loss': 0.0539, 'grad_norm': 0.9951111078262329, 'learning_rate': 0.0002, 'epoch': 16.0}




{'loss': 0.0409, 'grad_norm': 0.7388918399810791, 'learning_rate': 0.0002, 'epoch': 17.0}




{'loss': 0.0328, 'grad_norm': 0.6347187161445618, 'learning_rate': 0.0002, 'epoch': 18.0}




{'loss': 0.0179, 'grad_norm': 0.5814288258552551, 'learning_rate': 0.0002, 'epoch': 19.0}




{'loss': 0.0144, 'grad_norm': 0.38345232605934143, 'learning_rate': 0.0002, 'epoch': 20.0}




{'loss': 0.0131, 'grad_norm': 0.2730883061885834, 'learning_rate': 0.0002, 'epoch': 21.0}




{'loss': 0.0113, 'grad_norm': 0.2855501174926758, 'learning_rate': 0.0002, 'epoch': 22.0}




{'loss': 0.0139, 'grad_norm': 0.8927024602890015, 'learning_rate': 0.0002, 'epoch': 23.0}




{'loss': 0.0116, 'grad_norm': 0.6429529190063477, 'learning_rate': 0.0002, 'epoch': 24.0}




{'loss': 0.0126, 'grad_norm': 0.4466339945793152, 'learning_rate': 0.0002, 'epoch': 25.0}




{'loss': 0.0104, 'grad_norm': 0.969870388507843, 'learning_rate': 0.0002, 'epoch': 26.0}




{'loss': 0.0106, 'grad_norm': 0.7224491238594055, 'learning_rate': 0.0002, 'epoch': 27.0}




{'loss': 0.0109, 'grad_norm': 0.2958216369152069, 'learning_rate': 0.0002, 'epoch': 28.0}




{'loss': 0.0086, 'grad_norm': 0.1187075525522232, 'learning_rate': 0.0002, 'epoch': 29.0}




{'loss': 0.008, 'grad_norm': 0.07802128046751022, 'learning_rate': 0.0002, 'epoch': 30.0}




{'loss': 0.0083, 'grad_norm': 0.2976166307926178, 'learning_rate': 0.0002, 'epoch': 31.0}




{'loss': 0.0077, 'grad_norm': 0.0977383702993393, 'learning_rate': 0.0002, 'epoch': 32.0}




{'loss': 0.0078, 'grad_norm': 0.13692888617515564, 'learning_rate': 0.0002, 'epoch': 33.0}




{'loss': 0.0067, 'grad_norm': 0.037686944007873535, 'learning_rate': 0.0002, 'epoch': 34.0}




{'loss': 0.0072, 'grad_norm': 0.28589779138565063, 'learning_rate': 0.0002, 'epoch': 35.0}




{'loss': 0.0063, 'grad_norm': 0.03592372313141823, 'learning_rate': 0.0002, 'epoch': 36.0}




{'loss': 0.0067, 'grad_norm': 0.07255834341049194, 'learning_rate': 0.0002, 'epoch': 37.0}




{'loss': 0.0059, 'grad_norm': 0.02471020445227623, 'learning_rate': 0.0002, 'epoch': 38.0}




{'loss': 0.0063, 'grad_norm': 0.1405179649591446, 'learning_rate': 0.0002, 'epoch': 39.0}




{'loss': 0.0062, 'grad_norm': 0.03900843486189842, 'learning_rate': 0.0002, 'epoch': 40.0}




{'loss': 0.0066, 'grad_norm': 0.3426702320575714, 'learning_rate': 0.0002, 'epoch': 41.0}




{'loss': 0.0063, 'grad_norm': 0.06430701166391373, 'learning_rate': 0.0002, 'epoch': 42.0}




{'loss': 0.0062, 'grad_norm': 0.09129438549280167, 'learning_rate': 0.0002, 'epoch': 43.0}




{'loss': 0.006, 'grad_norm': 0.09564924240112305, 'learning_rate': 0.0002, 'epoch': 44.0}




{'loss': 0.0061, 'grad_norm': 0.1271040439605713, 'learning_rate': 0.0002, 'epoch': 45.0}




{'loss': 0.0061, 'grad_norm': 0.12111402302980423, 'learning_rate': 0.0002, 'epoch': 46.0}




{'loss': 0.0065, 'grad_norm': 0.3167111873626709, 'learning_rate': 0.0002, 'epoch': 47.0}




{'loss': 0.0062, 'grad_norm': 0.2000846415758133, 'learning_rate': 0.0002, 'epoch': 48.0}




{'loss': 0.0062, 'grad_norm': 0.12277710437774658, 'learning_rate': 0.0002, 'epoch': 49.0}




{'loss': 0.0068, 'grad_norm': 0.5893953442573547, 'learning_rate': 0.0002, 'epoch': 50.0}




{'loss': 0.0062, 'grad_norm': 0.10922474414110184, 'learning_rate': 0.0002, 'epoch': 51.0}




{'loss': 0.0064, 'grad_norm': 0.13766852021217346, 'learning_rate': 0.0002, 'epoch': 52.0}




{'loss': 0.006, 'grad_norm': 0.04491305351257324, 'learning_rate': 0.0002, 'epoch': 53.0}




{'loss': 0.006, 'grad_norm': 0.0408666729927063, 'learning_rate': 0.0002, 'epoch': 54.0}




{'loss': 0.0059, 'grad_norm': 0.026285594329237938, 'learning_rate': 0.0002, 'epoch': 55.0}




{'loss': 0.006, 'grad_norm': 0.15739211440086365, 'learning_rate': 0.0002, 'epoch': 56.0}




{'loss': 0.0056, 'grad_norm': 0.049522873014211655, 'learning_rate': 0.0002, 'epoch': 57.0}




{'loss': 0.0061, 'grad_norm': 0.15775184333324432, 'learning_rate': 0.0002, 'epoch': 58.0}




{'loss': 0.0056, 'grad_norm': 0.02227608487010002, 'learning_rate': 0.0002, 'epoch': 59.0}




{'loss': 0.0058, 'grad_norm': 0.049195438623428345, 'learning_rate': 0.0002, 'epoch': 60.0}




{'loss': 0.0056, 'grad_norm': 0.029021942988038063, 'learning_rate': 0.0002, 'epoch': 61.0}




{'loss': 0.0055, 'grad_norm': 0.0671427994966507, 'learning_rate': 0.0002, 'epoch': 62.0}




{'loss': 0.0054, 'grad_norm': 0.02003367245197296, 'learning_rate': 0.0002, 'epoch': 63.0}




{'loss': 0.0054, 'grad_norm': 0.04633141681551933, 'learning_rate': 0.0002, 'epoch': 64.0}




{'loss': 0.0058, 'grad_norm': 0.04112112894654274, 'learning_rate': 0.0002, 'epoch': 65.0}




{'loss': 0.0056, 'grad_norm': 0.059242308139801025, 'learning_rate': 0.0002, 'epoch': 66.0}




{'loss': 0.0055, 'grad_norm': 0.021270813420414925, 'learning_rate': 0.0002, 'epoch': 67.0}




{'loss': 0.0052, 'grad_norm': 0.018523691222071648, 'learning_rate': 0.0002, 'epoch': 68.0}




{'loss': 0.0052, 'grad_norm': 0.052203577011823654, 'learning_rate': 0.0002, 'epoch': 69.0}




{'loss': 0.0054, 'grad_norm': 0.018285151571035385, 'learning_rate': 0.0002, 'epoch': 70.0}




{'loss': 0.0052, 'grad_norm': 0.01236414909362793, 'learning_rate': 0.0002, 'epoch': 71.0}




{'loss': 0.0053, 'grad_norm': 0.014672666788101196, 'learning_rate': 0.0002, 'epoch': 72.0}




{'loss': 0.0052, 'grad_norm': 0.014979089610278606, 'learning_rate': 0.0002, 'epoch': 73.0}




{'loss': 0.0052, 'grad_norm': 0.01609225384891033, 'learning_rate': 0.0002, 'epoch': 74.0}




{'loss': 0.0051, 'grad_norm': 0.009963124990463257, 'learning_rate': 0.0002, 'epoch': 75.0}




{'loss': 0.0051, 'grad_norm': 0.012382252141833305, 'learning_rate': 0.0002, 'epoch': 76.0}




{'loss': 0.0051, 'grad_norm': 0.03827689215540886, 'learning_rate': 0.0002, 'epoch': 77.0}




{'loss': 0.005, 'grad_norm': 0.006782963871955872, 'learning_rate': 0.0002, 'epoch': 78.0}




{'loss': 0.0052, 'grad_norm': 0.02066495642066002, 'learning_rate': 0.0002, 'epoch': 79.0}




{'loss': 0.0051, 'grad_norm': 0.014173806644976139, 'learning_rate': 0.0002, 'epoch': 80.0}




{'loss': 0.005, 'grad_norm': 0.014733627438545227, 'learning_rate': 0.0002, 'epoch': 81.0}




{'loss': 0.0051, 'grad_norm': 0.0192056056112051, 'learning_rate': 0.0002, 'epoch': 82.0}




{'loss': 0.0049, 'grad_norm': 0.01628333516418934, 'learning_rate': 0.0002, 'epoch': 83.0}




{'loss': 0.0051, 'grad_norm': 0.01709144189953804, 'learning_rate': 0.0002, 'epoch': 84.0}




{'loss': 0.005, 'grad_norm': 0.011849517934024334, 'learning_rate': 0.0002, 'epoch': 85.0}




{'loss': 0.0048, 'grad_norm': 0.006568513810634613, 'learning_rate': 0.0002, 'epoch': 86.0}




{'loss': 0.0047, 'grad_norm': 0.006027226336300373, 'learning_rate': 0.0002, 'epoch': 87.0}




{'loss': 0.0049, 'grad_norm': 0.013078510761260986, 'learning_rate': 0.0002, 'epoch': 88.0}




{'loss': 0.0047, 'grad_norm': 0.0067552863620221615, 'learning_rate': 0.0002, 'epoch': 89.0}




{'loss': 0.0049, 'grad_norm': 0.01503585185855627, 'learning_rate': 0.0002, 'epoch': 90.0}




{'loss': 0.0046, 'grad_norm': 0.006129168439656496, 'learning_rate': 0.0002, 'epoch': 91.0}




{'loss': 0.0045, 'grad_norm': 0.005712463054805994, 'learning_rate': 0.0002, 'epoch': 92.0}




{'loss': 0.0046, 'grad_norm': 0.011509212665259838, 'learning_rate': 0.0002, 'epoch': 93.0}




{'loss': 0.0046, 'grad_norm': 0.013685797341167927, 'learning_rate': 0.0002, 'epoch': 94.0}




{'loss': 0.0044, 'grad_norm': 0.0066942935809493065, 'learning_rate': 0.0002, 'epoch': 95.0}




{'loss': 0.0044, 'grad_norm': 0.008408598601818085, 'learning_rate': 0.0002, 'epoch': 96.0}




{'loss': 0.0044, 'grad_norm': 0.010085014626383781, 'learning_rate': 0.0002, 'epoch': 97.0}




{'loss': 0.0047, 'grad_norm': 0.016642354428768158, 'learning_rate': 0.0002, 'epoch': 98.0}




{'loss': 0.0045, 'grad_norm': 0.014670269563794136, 'learning_rate': 0.0002, 'epoch': 99.0}




{'loss': 0.0042, 'grad_norm': 0.00804150104522705, 'learning_rate': 0.0002, 'epoch': 100.0}
{'train_runtime': 2955.8695, 'train_samples_per_second': 0.169, 'train_steps_per_second': 0.034, 'train_loss': 0.08280953351408243, 'epoch': 100.0}


TrainOutput(global_step=100, training_loss=0.08280953351408243, metrics={'train_runtime': 2955.8695, 'train_samples_per_second': 0.169, 'train_steps_per_second': 0.034, 'train_loss': 0.08280953351408243, 'epoch': 100.0})

In [30]:
trainer.save_model()

In [31]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    training_args.output_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(training_args.output_dir)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.37s/it]


In [44]:
sample = dataset_subset[23]

prompt = generate_prompt(sample, return_response=False)

In [45]:
input_ids = tokenizer(prompt[0], return_tensors="pt", truncation=True).input_ids.cuda()

outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9, temperature=0.5)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [46]:
print(f"Prompt:\n{prompt[0]}\n")
print(f"-------------")
print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt[0]):]}")
print(f"-------------")
print(f"Ground truth:\n{sample['instruction']}")

Prompt:
Generate a simple instruction that could result in the provided context.[INST]CONTEXT: "Waterloo is a small town in central Belgium that is known for its pivotal role in European history. In 1815, it was the site of the Battle of Waterloo, a historic conflict that marked the end of Napoleon Bonaparte's reign as the Emperor of the French.

The origins of the town can be traced back to the Roman period, when it was an important crossroads settlement. During the Middle Ages, Waterloo was a prosperous center of trade and industry, and was famed for its wool and cloth production.

In the 18th century, it became a popular destination for wealthy aristocrats, who were drawn to its picturesque landscapes and spa resorts. But it was in the 19th century, that Waterloo was forever etched into the annals of history – when, on June 18, 1815, the armies of Napoleon Bonaparte and a coalition of European powers clashed in the fields surrounding the town.

The Battle of Waterloo was one of the 