In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.padding_side = 'right'  
tokenizer.pad_token = tokenizer.eos_token


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

peft_model_id = "./model/model_5e-05_alpha-128_r-256"

# AutoPeftModelForCausalLM을 통해 모델과 LoRA를 한번에 로드
peft_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

# merge 수행 (adapter를 base 모델에 병합)
merged_model = peft_model.merge_and_unload()

# 토크나이저 불러오기
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
tokenizer.padding_side = 'right'  
tokenizer.pad_token = tokenizer.eos_token

# 병합된 모델과 tokenizer를 저장
save_merged_path = "./model/merged_model"
merged_model.save_pretrained(save_merged_path, safe_serialization=True, max_shard_size="5GB")
tokenizer.save_pretrained(save_merged_path)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

('./model/merged_model/tokenizer_config.json',
 './model/merged_model/special_tokens_map.json',
 './model/merged_model/tokenizer.json')

In [6]:
import os 
from huggingface_hub import HfApi
from dotenv import load_dotenv

load_dotenv("./credit-env")
api = HfApi()
hf_key=os.getenv("HUGGINGFACE_TOKEN")

username = "daje"
MODEL_NAME = 'Meta-Llama-3.1-8B-Instruct-de-identification'

api.create_repo(
    token=hf_key,
    repo_id=f"{username}/{MODEL_NAME}",
    repo_type="model"
)

api.upload_folder(
    token=hf_key,
    repo_id=f"{username}/{MODEL_NAME}",
    folder_path="./model/merged_model",
)


model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/daje/Meta-Llama-3.1-8B-Instruct-de-identification/commit/4f7a91d2d0ea9dbb2813bf6ca893504c7080fad4', commit_message='Upload folder using huggingface_hub', commit_description='', oid='4f7a91d2d0ea9dbb2813bf6ca893504c7080fad4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/daje/Meta-Llama-3.1-8B-Instruct-de-identification', endpoint='https://huggingface.co', repo_type='model', repo_id='daje/Meta-Llama-3.1-8B-Instruct-de-identification'), pr_revision=None, pr_num=None)