In [None]:
%%capture
!pip install fire torch peft transformers

In [None]:
import os
import sys

import fire
import torch

from peft import (
    PeftModel,
    PeftConfig,
)

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)

# 훈련가중치 최종 누적 경로를 얻기 위해
from transformers.trainer_utils import get_last_checkpoint

In [None]:
base_model= "/content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b"         # 기반 모델 경로
peft_root = "/content/drive/MyDrive/ToyProject/for_Colab/Train/lora/f16/"                   # PEFT 훈련가중치 루트경로
output_dir: str = "/content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16"   # 병합된 사전훈련모델 저장경로
max_shard_size : str = "10GB"  # 모델 파일 단위 사이즈

# --------------------------------------
# 0. 준비 작업
# --------------------------------------
# 0-1. 기반모델 경로(base_path) 얻기
# ------------------
base_path = None
if base_model and os.path.isdir(base_model):
    base_path = base_model
if not base_path:
    # PEFT 가중치 경로로부터, 구성정보(adapter_config.json) 로드
    peft_config = PeftConfig.from_pretrained(peft_root)
    base_path = peft_config.base_model_name_or_path
# [base_path] 파라미터 체크
assert(os.path.isdir(base_path)), "Found not a path for base model. Please specify a --base_model, e.g. --base_model='resources/polyglot-ko'"
# ------------------
# 0-2. PEFT 루트경로(peft_root)로부터, 최종 체크포인트(훈련가중치) 경로(last_peft_path) 얻기
# ------------------


last_peft_path = get_last_checkpoint(peft_root)
if last_peft_path is None:
    if len(os.listdir(peft_root)) > 0:
        raise ValueError(f"PEFT directory({peft_root}) already exists and is not empty.")
    else:
        raise ValueError(f"Found not checkpoint for PEFT in {peft_root}.")
else:
    print(
        f"Checkpoint detected: {last_peft_path}\n"
        "----------------------------------------"
    )

Checkpoint detected: /content/drive/MyDrive/ToyProject/for_Colab/Train/lora/f16/checkpoint-360
----------------------------------------


In [None]:
# --------------------------------------
# 1. 기반 모델+토크나이저, PEFT 가중치 로드
# --------------------------------------
# 1-1. 사전훈련모델(komodel) 로드
# ------------------
print(
    f"Loading model: {base_path}\n"
    "----------------------------------------"
)
komodel = AutoModelForCausalLM.from_pretrained(
    base_path,
    # load_in_8bit=False,
    torch_dtype=torch.float16,
    # device_map={"": "cpu"},
)
"""
komodel = AutoModelForCausalLM.from_pretrained(
    base_path,
    # 4비트 로드 양자화 적용
    use_safetensors=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        load_in_8bit=False,
        bnb_4bit_use_double_Quant=False,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type='nf4'
    ),
    torch_dtype=torch.float16,
    device_map=device_map,
    #trust_remote_code=True,        # HuggingFace 원격저장소에서 코드를 다운로드할 때, 코드를 신뢰할지 여부
)
"""

Loading model: /content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b
----------------------------------------


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

"\nkomodel = AutoModelForCausalLM.from_pretrained(\n    base_path,\n    # 4비트 로드 양자화 적용\n    use_safetensors=True,\n    quantization_config=BitsAndBytesConfig(\n        load_in_4bit=True,\n        load_in_8bit=False,\n        bnb_4bit_use_double_Quant=False,\n        bnb_4bit_compute_dtype=torch.bfloat16,\n        bnb_4bit_quant_type='nf4'\n    ),\n    torch_dtype=torch.float16,\n    device_map=device_map,\n    #trust_remote_code=True,        # HuggingFace 원격저장소에서 코드를 다운로드할 때, 코드를 신뢰할지 여부\n)\n"

In [None]:
# ------------------
# 1-2. 사전훈련된 토크나이저(tokenizer) 로드
# ------------------
print(
    f"Loading tokenizer: {base_path}\n"
    "----------------------------------------"
)
tokenizer = AutoTokenizer.from_pretrained(base_path)
# ------------------
# 1-3. 사전훈련모델에 PEFT 가중치 로드
# ------------------
print(
    f"Loading PEFT weights: {last_peft_path}\n"
    "----------------------------------------"
)
lora_model = PeftModel.from_pretrained(
    komodel,
    last_peft_path,
    # device_map={"": "cpu"},
    torch_dtype=torch.float16,
)
# --------------------------------------
# 2. 사전훈련모델에 PEFT 가중치를 병합한 후, 기존 PEFT 제거
# --------------------------------------
print(
    "Merging and Unload...\n"
    "----------------------------------------"
)
merged_model = lora_model.merge_and_unload(progressbar=True, safe_merge=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading tokenizer: /content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b
----------------------------------------
Loading PEFT weights: /content/drive/MyDrive/ToyProject/for_Colab/Train/lora/f16/checkpoint-360
----------------------------------------
Merging and Unload...
----------------------------------------


Unloading and merging model: 100%|██████████| 367/367 [00:01<00:00, 198.87it/s]


In [None]:
# --------------------------------------
# 3. 병합모델  훈련 시작
# --------------------------------------
print(
    "Training...\n"
    "----------------------------------------"
)
merged_model.train(False)
# ------------------------------------

Training...
----------------------------------------


GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(30080, 2048)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-23): 24 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXSdpaAttention(
          (rotary_emb): GPTNeoXRotaryEmbedding()
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True

In [None]:
# --------------------------------------
# 4. 병합모델 상태사전 교정
# --------------------------------------
print(
    "Correcting state_dict...\n"
    "----------------------------------------"
)
# 병합모델 상태사전 얻기
merged_model_sd = merged_model.state_dict()
# 병합모델 상태사전에서, lora 모델의 가중치를 제거시킨 상태사전을 만든다.
deloreanized_sd = {
    k.replace("base_model.model.", ""): v
    for k, v in merged_model_sd.items()
    if "lora" not in k
}
# --------------------------------------
# 5. [병합모델 & 토크나이저] 저장
# --------------------------------------
# 5-1. 모델 저장
# ------------------
# 미세조정 가중치 저장
print(
    f"Saving model: {output_dir}\n"
    "----------------------------------------"
)
komodel.save_pretrained(
    output_dir,
    state_dict=deloreanized_sd,
    max_shard_size=max_shard_size,
)
# ------------------
# 5-2. 토크나이저 저장
# ------------------
print(
    f"Saving tokenizer: {output_dir}\n"
    "----------------------------------------"
)
tokenizer.save_pretrained(output_dir)
# --------------------------------------

Correcting state_dict...
----------------------------------------
Saving model: /content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16
----------------------------------------
Saving tokenizer: /content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16
----------------------------------------


('/content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16/tokenizer_config.json',
 '/content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16/special_tokens_map.json',
 '/content/drive/MyDrive/ToyProject/for_Colab/Resource/polyglot-ko-1.3b-loraf16/tokenizer.json')