In [1]:
import os
from transformers import LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig
from itertools import chain
from transformers import default_data_collator, Trainer, TrainingArguments, TrainerCallback
from trl import SFTTrainer
import torch
import datasets
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_int8_training,
    AutoPeftModelForCausalLM
)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.empty_cache()

In [3]:
model_id="./models/7B-hf"
device_map = {"": 0}
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

In [4]:
tokenizer = LlamaTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token


In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)


model = LlamaForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map=device_map, torch_dtype=torch.float16)
model.config.use_cache = False
model.config.pretraining_tp = 1


Loading checkpoint shards: 100%|██████████| 2/2 [00:18<00:00,  9.03s/it]
Some weights of LlamaForCausalLM were not initialized from the model checkpoint at ./models/7B-hf and are newly initialized: ['model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_a

In [6]:
eval_prompt = """
Implement the following program given the instructions:
Write a simple program to calculate the cosine similarity between two vectors in Typescript.

Code:
"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))




Implement the following program given the instructions:
Write a simple program to calculate the cosine similarity between two vectors in Typescript.

Code:
```typescript
function cosineSimilarity(vector1: Vector, vector2: Vector): number {
  const dotProduct = vector1.dot(vector2);
  const magnitude1 = vector1.magnitude();
  const magnitude2 = vector2.magnitude();
  return dotProduct / (magnitude1 * magnitude2);
}
```

Input:
```typescript
const vector1 = new Vector([1,


In [7]:
def apply_prompt_template(samples, tokenizer):
    prompt = (
        f"Implement the following program given the instructions:\n{{instructions}}\n---\nCode:\n{{code}}{{eos_token}}"
    )    
    
    batch_text = []

    batch = list(zip(samples['prompt'], samples['response']))

    for sample in batch:
        text = prompt.format(
            instructions=sample[0],
            code=sample[1],
            eos_token=tokenizer.eos_token,
        )
        batch_text.append(text)
    
    return {"text": batch_text}

In [8]:
def get_preprocessed_codes(tokenizer, split):
    dataset = datasets.load_dataset("nampdn-ai/tiny-codes", split=split)
    dataset = dataset.shuffle().select(range(150_000))

    def apply_prompt_template_batch(samples):
        return apply_prompt_template(samples, tokenizer)

    return dataset.map(apply_prompt_template_batch, batched=True, remove_columns=list(dataset.features))


In [9]:
dataset = get_preprocessed_codes(tokenizer, "train")

Map: 100%|██████████| 150000/150000 [00:06<00:00, 23761.52 examples/s]


In [10]:
output_dir = "./llama-coder"
final_checkpoint_dir = os.path.join(output_dir, "final_checkpoint")

In [11]:
max_seq_length = 1024

# Define training args
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    fp16=True,  # Use fp16 if available
    logging_dir=f"{output_dir}/logs",
    logging_strategy="steps",
    logging_steps=50,
    save_strategy="no",
    optim="adamw_torch_fused",
    max_steps=500,
    report_to="none",
    learning_rate=1e-4,
    num_train_epochs=1,
    per_device_train_batch_size= 2,
    gradient_accumulation_steps= 2,
    per_device_eval_batch_size= 2,
    gradient_checkpointing= False
)

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules = ["q_proj", "v_proj"]
)

# https://huggingface.co/docs/trl/main/en/sft_trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_args,
    callbacks=[]
)

# Start training
trainer.train()



Map: 100%|██████████| 150000/150000 [08:46<00:00, 284.68 examples/s]


Step,Training Loss
50,1.1282
100,0.7851
150,0.7254
200,0.6947
250,0.7029
300,0.6633
350,0.6782
400,0.6611
450,0.6584
500,0.6547


TrainOutput(global_step=500, training_loss=0.7352132568359375, metrics={'train_runtime': 5647.0774, 'train_samples_per_second': 0.354, 'train_steps_per_second': 0.089, 'total_flos': 2.7725034962386944e+16, 'train_loss': 0.7352132568359375, 'epoch': 0.01})

In [12]:
trainer.model.save_pretrained(final_checkpoint_dir)

In [13]:
# tuned_model = AutoPeftModelForCausalLM.from_pretrained(final_checkpoint_dir, device_map=device_map, torch_dtype=torch.bfloat16)

Loading checkpoint shards:  50%|█████     | 1/2 [00:20<00:20, 20.39s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 14.58 GiB total capacity; 14.29 GiB already allocated; 41.56 MiB free; 14.40 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# eval_prompt = """
# Summarize this dialog:
# A: Hi Tom, are you busy tomorrow’s afternoon?
# B: I’m pretty sure I am. What’s up?
# A: Can you go with me to the animal shelter?.
# B: What do you want to do?
# A: I want to get a puppy for my son.
# B: That will make him so happy.
# A: Yeah, we’ve discussed it many times. I think he’s ready now.
# B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) 
# A: I'll get him one of those little dogs.
# B: One that won't grow up too big;-)
# A: And eat too much;-))
# B: Do you know which one he would like?
# A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
# B: I bet you had to drag him away.
# A: He wanted to take it home right away ;-).
# B: I wonder what he'll name it.
# A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))
# ---
# Summary:
# """

# model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

# with torch.no_grad():
#     print(tokenizer.decode(tuned_model.generate(**model_input, max_new_tokens=64)[0], skip_special_tokens=True))


Summarize this dialog:
A: Hi Tom, are you busy tomorrow’s afternoon?
B: I’m pretty sure I am. What’s up?
A: Can you go with me to the animal shelter?.
B: What do you want to do?
A: I want to get a puppy for my son.
B: That will make him so happy.
A: Yeah, we’ve discussed it many times. I think he’s ready now.
B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) 
A: I'll get him one of those little dogs.
B: One that won't grow up too big;-)
A: And eat too much;-))
B: Do you know which one he would like?
A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
B: I bet you had to drag him away.
A: He wanted to take it home right away ;-).
B: I wonder what he'll name it.
A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))
---
Summary:
A will take his son to the animal shelter to get a puppy. He will take his son's favourite one. He will name it Lemmy. 
---
Summary:
A will take his son to the animal shelter 