<a href="https://colab.research.google.com/github/dileep9968/pytorch/blob/main/Tiny_LLAMA_fine_tuned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q accelerate -U
!pip install -q bitsandbytes -U
!pip install -q trl -U
!pip install -q peft -U
!pip install -q transformers -U

In [2]:
from datasets import load_dataset
dataset = load_dataset('HuggingFaceH4/ultrachat_200k',
                       trust_remote_code=True,
                       split = 'train_sft')
dataset = dataset.shuffle(seed=0).select(range(10_000))
dataset

Dataset({
    features: ['prompt', 'prompt_id', 'messages'],
    num_rows: 10000
})

In [3]:
dataset[0]

{'prompt': '3. Heat the vegetables in boiling water for the recommended time.',
 'prompt_id': '827b4bc3c5d8646e574bd741d65f7de92057be4f1fb1a4456d5f136cf7397568',
 'messages': [{'content': '3. Heat the vegetables in boiling water for the recommended time.',
   'role': 'user'},
  {'content': 'I do not have information about the specific type of vegetables being referred to. However, here are general instructions for boiling most vegetables:\n\n1. Wash the vegetables thoroughly with clean water.\n2. Cut the vegetables into small or medium-sized pieces.\n3. Bring a pot of water to boil on the stove.\n4. Add a pinch of salt to the boiling water.\n5. Add the vegetables to the boiling water.\n6. Cook the vegetables for the recommended time (see cooking instructions on the package or look up cooking times for specific vegetables online).\n7. Test the vegetables for doneness using a fork or a knife. They should be tender but not overcooked and mushy.\n8. Once the vegetables are cooked to your d

In [4]:
from transformers import AutoTokenizer

template_tokenizer = AutoTokenizer.from_pretrained(
    'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
)
template_tokenizer

LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v1.0', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [5]:
def format_prompt(example):
  """Format the prompt using the <|user|> and <|assistant|> format"""
  chat = example['messages']
  prompt = template_tokenizer.apply_chat_template(chat,
                                                  tokenize=False,
                                                  add_generation_prompt=True)
  return {'text': prompt}

dataset = dataset.map(format_prompt)
print(format_prompt(dataset[0])['text'])

<|user|>
3. Heat the vegetables in boiling water for the recommended time.</s>
<|assistant|>
I do not have information about the specific type of vegetables being referred to. However, here are general instructions for boiling most vegetables:

1. Wash the vegetables thoroughly with clean water.
2. Cut the vegetables into small or medium-sized pieces.
3. Bring a pot of water to boil on the stove.
4. Add a pinch of salt to the boiling water.
5. Add the vegetables to the boiling water.
6. Cook the vegetables for the recommended time (see cooking instructions on the package or look up cooking times for specific vegetables online).
7. Test the vegetables for doneness using a fork or a knife. They should be tender but not overcooked and mushy.
8. Once the vegetables are cooked to your desired tenderness, remove them from the boiling water using a slotted spoon or a strainer.
9. Drain the vegetables and serve hot with your favorite seasonings or sauce.</s>
<|user|>
Can you add some informati

In [6]:
dataset = dataset.map(format_prompt)

In [7]:
dataset[0]

{'prompt': '3. Heat the vegetables in boiling water for the recommended time.',
 'prompt_id': '827b4bc3c5d8646e574bd741d65f7de92057be4f1fb1a4456d5f136cf7397568',
 'messages': [{'content': '3. Heat the vegetables in boiling water for the recommended time.',
   'role': 'user'},
  {'content': 'I do not have information about the specific type of vegetables being referred to. However, here are general instructions for boiling most vegetables:\n\n1. Wash the vegetables thoroughly with clean water.\n2. Cut the vegetables into small or medium-sized pieces.\n3. Bring a pot of water to boil on the stove.\n4. Add a pinch of salt to the boiling water.\n5. Add the vegetables to the boiling water.\n6. Cook the vegetables for the recommended time (see cooking instructions on the package or look up cooking times for specific vegetables online).\n7. Test the vegetables for doneness using a fork or a knife. They should be tender but not overcooked and mushy.\n8. Once the vegetables are cooked to your d

### Testing Base LLAMA MODEL

In [8]:
from transformers import pipeline

model_name = 'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T'
pipe = pipeline('text-generation', model=model_name, device_map='auto')


# prompt
# <|user|>, <|assistant|>
prompt = """
Tell me something about Large Language Models.</s>
"""

output = pipe(prompt)
output

Device set to use cuda:0


[{'generated_text': '\nTell me something about Large Language Models.</s>\n\nBiografia \nNato a Roma, inizia la sua carriera di attore nel 1980, interpretando il ruolo di un ragazzo di 16 anni nella commedia teatrale La ragazza di 16 anni di Carlo Goldoni.\n\nNel 1981, dopo aver recitato in alcune produzioni teatrali, debutta sul grande schermo con il film di Lucio Fulci La ragazza di 16 anni.\n\nNel 1982, dopo aver recitato in alcune produzioni teatrali, debutta sul grande schermo con il film di Lucio Fulci La ragazza di 16 anni.\n\nNel 1983, dopo aver recitato in alcune produzioni teatrali, debutta sul grande schermo con il film di Lucio Fulci La ragazza di 16 anni.\n\nNel 1984, dopo aver recitato in alcune produzioni teatrali, debutta sul grande schermo con il film di Lucio Fulci La ragazza di 16 anni.\n\nNel 1985, dopo aver recitato in alcune produzioni teatrali, debutta sul grande schermo con il film di Lucio Fulci La ragazza di 16 anni.\n\nNel 1986, dopo aver recitato in alcune p

### Model Configuration for Traning

In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

In [10]:
# do the 4-bit quantization configuration in Q-LORA

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = 'float16',
    bnb_4bit_use_double_quant = True
)

In [11]:
tokenzier = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenzier.pad_token = '<PAD>'
tokenzier.padding_side = 'left'


In [12]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map='auto'
)


In [13]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear4bit(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05

In [14]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [15]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear4bit(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05

### Prepare LORA Configuration for PEFT Fine Tunning

In [16]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

peft_config = LoraConfig(
    lora_alpha = 32,
    lora_dropout = 0.1,
    r = 64,
    bias = 'none',
    task_type ='CAUSAL_LM',
    target_modules = ['q_proj','k_proj', 'v_proj', 'o_proj','gate_porj', 'up_proj','down_proj']
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): l

In [17]:
from transformers import TrainingArguments
from trl import SFTTrainer , SFTConfig
output_dir = 'train_dir'
args = TrainingArguments(
    output_dir = output_dir,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    optim = 'paged_adamw_32bit',
    learning_rate = 2e-4,
    lr_scheduler_type = 'cosine',
    num_train_epochs = 1,
    logging_steps = 10,
    fp16 = True,
    gradient_checkpointing = True
)
sft_config = SFTConfig(
    output_dir = output_dir,
    dataset_text_field = 'text'
)


In [21]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    #dataset_text_field="text",
    args = args,
    tokenizer = tokenzier,
   # max_seq_length = 512,
    peft_config = peft_config
)


trainer.train()

  trainer = SFTTrainer(


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating