# Setup the model

### Libraries

In [None]:
import os
import re
import torch
import torch.nn as nn
from datasets import load_dataset
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
import transformers
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig

In [None]:
!nvidia-smi 

## Calling a Model

In [None]:
model_path = "" # Model Name or Path

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    device_map='auto',
    load_in_8bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_path, truncation=True, padding=True)

### Test the Model

In [None]:
%%time
prompt = f"""[INST] <<SYS>>
    You are an assistant, Answer the user about anything. 
    <</SYS>>
    "It was a bad week, what should I plan for the weekend?"[/INST]"""

batch = tokenizer(prompt, return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=250, repetition_penalty=1.1,)

output = tokenizer.decode(output_tokens[0], skip_special_tokens=True).replace(prompt, "")
output

In [None]:
prompt = f"""[INST] <<SYS>>
    You are a key word finder, Find the describing words for the following sentence 
    <</SYS>>
    "After they had break up, he wasn't the same"[/INST]"""

batch = tokenizer(prompt, return_tensors='pt')

output_tokens = model.generate(**batch, max_new_tokens=50, repetition_penalty=1.1,)

output = tokenizer.decode(output_tokens[0], skip_special_tokens=True).replace(prompt, "")
output

### Trainable Parameters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        print(param.shape, param.numel(), param.dtype)
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
print_trainable_parameters(model)

In [None]:
print(model)

### Lora Config

In [None]:

config = LoraConfig(
    r=4, # rank
    lora_alpha=32, #alpha scaling
    target_modules=['q_proj','k_proj', "v_proj", "o_proj"], #if you know the 
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

peft_model = get_peft_model(model, config)
print_trainable_parameters(peft_model)


### Loading Data

In [None]:
data = load_dataset("Abirate/english_quotes")

In [None]:
data

In [None]:
data.keys()

In [None]:
data['train'][0]

In [None]:
def merge_columns(example):
    example['prediction'] = f"""<s>[INST] <<SYS>>
    You are a key word finder, Find the describing words for the following sentence 
    <</SYS>>
    {example["quote"]}[/INST]
    {str(example["tags"])} </s>"""
    return example

data['train'] = data['train'].map(merge_columns)
data['train']["prediction"][0]

In [None]:
data = data.map(lambda sample: tokenizer(sample['prediction']), batched=True)

In [None]:
data

### If you are using llama-2-7b-chat-hf

In [None]:
tokenizer.pad_token = "[PAD]"

### Fine-tunning the model

In [None]:
trainer = transformers.Trainer(
    model=peft_model, 
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1, 
        gradient_accumulation_steps=1,
        warmup_steps=100, 
        max_steps=100, 
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=1, 
        output_dir='outputs',

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
peft_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:

print(peft_model)

In [None]:
prompt = f"""[INST] <<SYS>>
    You are a key word finder, Find the describing words for the following sentence 
    <</SYS>>
    "You lost in the finals"[/INST]"""

batch = tokenizer(prompt, return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = peft_model.generate(**batch, max_new_tokens=50, repetition_penalty=1.1,)

output = tokenizer.decode(output_tokens[0], skip_special_tokens=True).replace(prompt, '')
output

### Model Saving

In [None]:
peft_model.save_pretrained("llama-2-7b-chat-hf-lora")

# lora_model.push_to_hub("your-name/llama-2-7b-chat-hf-lora")


### Model Loading

In [None]:
config = PeftConfig.from_pretrained("llama-2-7b-chat-hf-lora")
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
lora_model = PeftModel.from_pretrained(model, "llama-2-7b-chat-hf-lora") # is_trainable=False


In [None]:
%%time
prompt = f"""[INST] <<SYS>>
    You are a key word finder, Find the describing words for the following sentence 
    <</SYS>>
    "He is a good player."[/INST]"""

batch = tokenizer(prompt, return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = lora_model.generate(**batch, max_new_tokens=50, repetition_penalty=1.1,)

output = tokenizer.decode(output_tokens[0], skip_special_tokens=True).replace(prompt, '').lstrip()
output

### Merge Model with LoRA

In [None]:
merged_model = lora_model.merge_and_unload()
merged_model.save_pretrained("Merged_Model")
