In [None]:
%%capture
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes 
%pip install -U wandb

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

login(token = "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")



wandb.login(key="94f165f80ac98c766ee93ad22e84ead39d2593b2")
run = wandb.init(
    project='llama_interview_3Bins', 
    job_type="training"
)

In [None]:
torch_dtype = torch.float16
attn_implementation = "eager"

# quantization 
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
model_name = "meta-llama/Llama-3.2-3B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

print("done !")

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
model, tokenizer = setup_chat_format(model, tokenizer)
print("done !")

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)
print("done !")

In [None]:
dataset = load_dataset("json", data_files="/kaggle/input/qa-datasetttt/qa_dataset.json")


print(dataset)

In [None]:
#format the chat template
def format_chat_template(row):
    row_json = [
        {"role": "user", "content": row["question"]},
        {"role": "assistant", "content": row["answer"]}
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)  
    return row

dataset = dataset.map(format_chat_template)



In [None]:
#eos_token
tokenizer.pad_token = tokenizer.eos_token

#tokenize with padding
tokenized_dataset = dataset.map(
    lambda x: tokenizer(x['question'], x['answer'], truncation=True, padding="max_length", max_length=512),
    batched=True
)


In [None]:
if "train" in dataset:
    dataset = dataset["train"]

# Split the dataset into train and test
split_dataset = dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = split_dataset["train"]
test_dataset = split_dataset["test"]

In [None]:
training_arguments = TrainingArguments(
    output_dir="finetuned_llama_interview_iyed",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=10,#will overfitt but fine iwant it to 
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10, 
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

In [None]:
trainer.train()


In [None]:
messages = [
    {
        "role": "user",
        "content": "what's your name  ?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, 
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, 
                   truncation=True, max_length=512).to("cuda")


outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)#, num_beams=10, early_stopping=False, repetition_penalty=2.2)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

In [None]:
trainer.model.save_pretrained("llama_3Bins_interview")

In [None]:
from huggingface_hub import login
login(token="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")

In [None]:
model.push_to_hub("iy3d1243/finetuned-llama-interview-model")
tokenizer.push_to_hub("iy3d1243/finetuned-llama-interview-model")