In [1]:
import pandas as pd
import numpy as np
import torch
import transformers
import bitsandbytes as bnb
import os
import wandb
import json

from transformers import AdamW, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from tqdm import tqdm
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit, PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,BitsAndBytesConfig
num=1

2024-04-01 15:30:26.996365: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-01 15:30:27.013137: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-01 15:30:27.013155: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-01 15:30:27.013166: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-01 15:30:27.016438: I tensorflow/core/platform/cpu_feature_g

# 데이터 로드

In [None]:
data=pd.read_csv('fine-tuning_dataset.csv',index_col=0)

In [None]:
#토큰화를 위해 토크나이저 선언하기
tokenizer = AutoTokenizer.from_pretrained('LDCC/LDCC-SOLAR-10.7B',  eos_token='</s>')

In [None]:
# 최대 길이 설정
max_length = 660
formatted_data = []
input_texts = []

for i in range(len(data)):
    page_text = f"### Question: {data['질문'][i]} {tokenizer.eos_token} \n### Answer:{data['답변'][i]}"
    input_texts.append(page_text)

for text in input_texts:
    input_ids = tokenizer.encode(text, return_tensors='pt', padding='max_length', truncation=True, max_length=max_length)
    formatted_data.append(input_ids)
    

print('Done.')
print(len(formatted_data))

In [None]:
formatted_data = torch.cat(formatted_data, dim=0)

# 모델로드

In [None]:
model_name = "LDCC/LDCC-SOLAR-10.7B"
output_dir=f"results{num}"

#양자화 하는 코드
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

base_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                  torch_dtype=torch.float16, 
                                                  quantization_config=bnb_config, 
                                                  device_map={"":0})
base_model.config.use_cache = False
base_model = prepare_model_for_kbit_training(base_model)

In [None]:
#토크나이저 이슈로 다시 선언해줘야함
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"  # Fix weird overflow issue with fp16 training

In [None]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,BitsAndBytesConfig
from datasets import load_dataset
from trl import SFTTrainer
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training

#파라미터 갯수 세는 코드
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
# 로라 선언하는 코드
config = LoraConfig(
    r=16, 
    lora_alpha=8, 
    target_modules=[
    "q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj"],
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, config)
print_trainable_parameters(base_model)


In [None]:
# 학습시키는 코드
trainer = transformers.Trainer(
    model=model,
    train_dataset=formatted_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size= 4, 
        gradient_accumulation_steps=1, 
        num_train_epochs=1,
      #  max_steps=50,
        learning_rate=3.5e-5,
        max_grad_norm= 0.3,
        fp16=True, # Use mixed precision(훈련 중에 모델에서 16-bit 및 32-bit 부동 소수점 유형을 모두 사용하여 더 빠르게 실행하고 메모리를 적게 사용하는 것입니다)
        logging_steps=10, 
        output_dir=output_dir, 
        optim="paged_adamw_32bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()
trainer.save_model(output_dir)

output_dir = os.path.join(output_dir, "final_checkpoint")
trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)


In [None]:
#adapter병합하는 코드 gpu메모리 문제로 커널 재시작 후 하는게 좋음, 아니면 gpu메모리 초기화하면됨
import os
import torch
from transformers import GenerationConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel,AutoPeftModelForCausalLM

# torch.cuda.empty_cache()

# Update the path accordingly
adapter_dir = f'./results{num}/final_checkpoint'
output_dir = f'./merged_peft{num}'


model = AutoPeftModelForCausalLM.from_pretrained(adapter_dir, device_map="cuda", torch_dtype=torch.float16)
# generation_config.do_sample = True

tokenizer = AutoTokenizer.from_pretrained(adapter_dir)


model = model.merge_and_unload()



output_merged_dir = os.path.join(output_dir, "final_merged_checkpoint")
model.save_pretrained(output_merged_dir, safe_serialization=True,)
tokenizer.save_pretrained(output_merged_dir)