# 1. Weight and Bias Login

In [1]:
import wandb
import os
os.environ["WANDB_PROJECT"]="QLoRA_Instruction_finetune_06"

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maeolian83[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# 2. Login Huggingface

In [2]:
from huggingface_hub import login
from dotenv import load_dotenv

load_dotenv()


login(token= os.environ["HF_TOKEN"])

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/aeolian83/.cache/huggingface/token
Login successful


# 3. Dataset Load

In [3]:
from datasets import load_dataset

In [4]:
ko_instruction_01 = load_dataset("nlpai-lab/kullm-v2", cache_dir="/mnt/t7/.cache/huggingface/datasets", split="train")

In [5]:
ko_instruction_01 = ko_instruction_01.shuffle(seed=2160)

In [6]:
ko_instruction_01

Dataset({
    features: ['id', 'instruction', 'input', 'output'],
    num_rows: 152630
})

In [7]:
ko_instruction_01 = ko_instruction_01.train_test_split(test_size=0.92)

In [8]:
ko_instruction_01

DatasetDict({
    train: Dataset({
        features: ['id', 'instruction', 'input', 'output'],
        num_rows: 12210
    })
    test: Dataset({
        features: ['id', 'instruction', 'input', 'output'],
        num_rows: 140420
    })
})

In [9]:
ko_instruction_01["train"][100]

{'id': 'alpaca_{idx}',
 'instruction': '다음 단락의 흐름에 대해 1~10점 척도로 평가하세요.',
 'input': '산타바바라시는 남부 캘리포니아의 태평양 연안에 위치한 아름답고 활기찬 지역 사회입니다. 아름다운 해변, 온화한 날씨, 스페인 건축 양식으로 유명합니다.',
 'output': '문단의 흐름은 9점으로 평가하고 싶습니다. 문장이 일관성 있고 매끄럽게 이어지며 산타바바라라는 도시를 잘 설명하는 정보와 함께 잘 어우러져 있습니다.'}

# 4. Loading the Model

In [10]:
model_id = "beomi/llama-2-ko-7b"
device_map = {"": 0}
cache_model_dir="/mnt/t7/.cache/huggingface/models"

In [11]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [12]:
# 4bit QLoRA 학습을 위한 설정
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [13]:
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config, device_map=device_map, cache_dir=cache_model_dir, trust_remote_code=True)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, cache_dir=cache_model_dir)
tokenizer.pad_token = tokenizer.eos_token

# 5. LoRA Setup

In [15]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

In [16]:
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

# 6. Formatting Dataset

In [17]:
def format_instruction(sample):
    system_prompt = f"### instruction: {sample['instruction']}"
    input = f"### input: {sample['input']}" if len(sample["input"]) > 0 else None
    output = f"### output: {sample['output']}"
    # join all the parts together
    prompt = "\n\n".join([i for i in [system_prompt, input, output] if i is not None])
    return prompt

# template dataset to add prompt to each sample
def template_dataset(sample):
    sample["text"] = f"{format_instruction(sample)}{tokenizer.eos_token}"
    return sample

In [18]:
train_dataset = ko_instruction_01['train'].map(template_dataset, remove_columns=list(ko_instruction_01['train'].features), num_proc=10)

Map (num_proc=10):   0%|          | 0/12210 [00:00<?, ? examples/s]

In [19]:
train_dataset["text"][100]

'### instruction: 다음 단락의 흐름에 대해 1~10점 척도로 평가하세요.\n\n### input: 산타바바라시는 남부 캘리포니아의 태평양 연안에 위치한 아름답고 활기찬 지역 사회입니다. 아름다운 해변, 온화한 날씨, 스페인 건축 양식으로 유명합니다.\n\n### output: 문단의 흐름은 9점으로 평가하고 싶습니다. 문장이 일관성 있고 매끄럽게 이어지며 산타바바라라는 도시를 잘 설명하는 정보와 함께 잘 어우러져 있습니다.</s>'

# 7. Training Argument Setup

In [20]:
from transformers import TrainingArguments

In [21]:
output_dir = "./checkpoint/experi_05"
per_device_train_batch_size = 1
gradient_accumulation_steps = 2
optim = "paged_adamw_32bit"
report_to="wandb"
save_steps = 20
save_total_limit=5
num_train_epochs = 2
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
warmup_ratio = 0.03
lr_scheduler_type = "linear"

In [22]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    num_train_epochs=num_train_epochs,
    save_steps=save_steps,
    save_total_limit=save_total_limit,
    logging_steps=logging_steps,
    report_to = report_to,
    learning_rate=learning_rate,
    bf16=True,
    max_grad_norm=max_grad_norm,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [23]:
from trl import SFTTrainer

max_seq_length = 1024

In [24]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/12210 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [25]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

# 8. Training

In [26]:
trainer.train()

  0%|          | 0/12210 [00:00<?, ?it/s]

{'loss': 2.0978, 'grad_norm': 0.1142578125, 'learning_rate': 5.449591280653951e-06, 'epoch': 0.0}
{'loss': 2.408, 'grad_norm': 0.171875, 'learning_rate': 1.0899182561307902e-05, 'epoch': 0.0}
{'loss': 2.7818, 'grad_norm': 0.1611328125, 'learning_rate': 1.6348773841961854e-05, 'epoch': 0.0}
{'loss': 3.1363, 'grad_norm': 0.263671875, 'learning_rate': 2.1798365122615804e-05, 'epoch': 0.01}
{'loss': 3.9882, 'grad_norm': 1.40625, 'learning_rate': 2.7247956403269757e-05, 'epoch': 0.01}
{'loss': 2.4287, 'grad_norm': 0.193359375, 'learning_rate': 3.269754768392371e-05, 'epoch': 0.01}
{'loss': 2.2763, 'grad_norm': 0.1953125, 'learning_rate': 3.8147138964577664e-05, 'epoch': 0.01}
{'loss': 2.385, 'grad_norm': 0.380859375, 'learning_rate': 4.359673024523161e-05, 'epoch': 0.01}
{'loss': 2.5278, 'grad_norm': 0.83984375, 'learning_rate': 4.9046321525885565e-05, 'epoch': 0.01}
{'loss': 2.5841, 'grad_norm': 6.65625, 'learning_rate': 5.4495912806539515e-05, 'epoch': 0.02}
{'loss': 1.8774, 'grad_norm': 

In [None]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("./results/experi_07")

In [None]:
lora_config = LoraConfig.from_pretrained("./results/experi_07")
model = get_peft_model(model, lora_config)

In [None]:
text = "### instruction: AI의 정의에 대해 설명해줘.\n\n### output:"

In [None]:
device = "cuda:0"

In [None]:
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=500)



In [None]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### instruction: AI의 정의에 대해 설명해줘.

### output: AI는 인공지능을 말합니다.​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​


In [None]:
print(len(outputs[0]))

520


In [None]:
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

<s> ### instruction: AI의 정의에 대해 설명해줘.

### output: AI는 인공지능을 말합니다.​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​


In [None]:
outputs[0]

tensor([    1,   835, 15278, 29901,   319, 29902, 30708, 32984, 31054, 32550,
        32750, 31435, 45189, 29889,    13,    13,  2277, 29937,  1962, 29901,
          319, 29902, 31081, 37651, 31286, 32047, 32111, 29889, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166,
        30166, 30166, 30166, 30166, 30166, 30166, 30166, 30166, 