# DeepSeek-R1 파인튜닝 노트북

이 노트북은 DeepSeek-R1-Distill-Qwen-1.5B 모델을 페르소나 대화 데이터셋으로 파인튜닝하기 위한 코드입니다.
L4 GPU에 최적화되어 있으며, 4bit 양자화를 사용하지 않고 mixed precision(bf16)을 활용합니다.

In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
# 필요한 라이브러리 설치
!pip install transformers datasets torch fireducks tensorboard

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting fireducks
  Downloading fireducks-1.2.5-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64

In [2]:
!pip install peft tqdm



In [4]:
# 필요한 라이브러리 임포트
import os
import glob
# import pandas as pd
import fireducks.pandas as pd
import torch
import unicodedata
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    TrainerCallback,
    TrainerState,
    TrainerControl
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model
from tqdm import tqdm
from transformers.integrations import TensorBoardCallback
from huggingface_hub import login


## 1. 설정 및 하이퍼파라미터

In [5]:
# 데이터 경로 설정
DATA_DIR = os.path.abspath("/content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/")
MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
OUTPUT_DIR = os.path.join(DATA_DIR, "deepseek-r1-finetuned")
TB_LOG_DIR = os.path.join(OUTPUT_DIR, "tensorboard_logs")

# LoRA 설정 (메모리 효율성 중심)
LORA_R = 8                # 기존 16 → 낮은 랭크로 메모리 절약
LORA_ALPHA = 16           # alpha = 2*R 권장
LORA_DROPOUT = 0.15       # 약간의 정규화 강화
LORA_TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"  # MLP 레이어 추가
]

# 학습 설정 (L4 최적화)
BATCH_SIZE = 4                     # 기존 2 → L4에서 가능한 최대 배치
GRADIENT_ACCUMULATION_STEPS = 8    # 기존 16 → 유효 배치 크기 유지 (4×8=32)
LEARNING_RATE = 3e-4               # 기존 5e-4 → 낮은 LR로 안정성 확보
NUM_EPOCHS = 1                     # 변동 없음 (소형 모델 특성)
MAX_LENGTH = 512                   # 변동 없음 (VRAM 한계)
WARMUP_RATIO = 0.05                # 기존 0.1 → 빠른 워밍업
WEIGHT_DECAY = 0.01                # 기존 0.05 → 과적합 방지 조정

# 처리할 시트 목록
SHEET_NAMES = ["알리바이_대화", "인터뷰_대화", "가쉽_대화"]

## 2. 커스텀 콜백 클래스 정의

In [6]:
# 커스텀 콜백 클래스
class CustomCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 10 == 0:  # 10 스텝마다 진행상황 출력
            print(f"Step {state.global_step}/{state.max_steps} - Loss: {state.log_history[-1]['loss']:.4f}")
        return control

    def on_epoch_end(self, args, state, control, **kwargs):
        print(f"\nEpoch {state.epoch} completed\n")
        return control

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics:
            print(f"\n===== Evaluation Results at Step {state.global_step} =====")
            for key, value in metrics.items():
                print(f"{key}: {value:.4f}")
            print("=" * 50)
        return control

## 3. 데이터 로딩 및 준비 함수

In [7]:
def load_and_prepare_data():
    # Excel 파일 찾기
    search_prefix = "페르소나 데이터_"
    search_suffix = ".xlsx"
    normalized_prefix = unicodedata.normalize('NFC', search_prefix)

    print(f"Searching for files in {DATA_DIR}")
    EXCEL_FILES = []
    for filename in os.listdir(DATA_DIR):
        normalized_filename = unicodedata.normalize('NFC', filename)
        if normalized_filename.startswith(normalized_prefix) and normalized_filename.endswith(search_suffix) and not normalized_filename.startswith("~$"):
            EXCEL_FILES.append(os.path.join(DATA_DIR, filename))
    print(f"Found Excel files: {EXCEL_FILES}")

    dialogue_datas = []
    for excel_file in tqdm(EXCEL_FILES, desc="Loading Excel files"):
        print(f"Processing file: {excel_file}")

        persona_name = excel_file.split('_')[-1].split('.')[0]
        person_dialogue_dfs = []
        for sheet_name in SHEET_NAMES:
            try:
                print(f"  Reading sheet: {sheet_name}")
                # Read Excel with string type for all columns
                df = pd.read_excel(
                    excel_file,
                    sheet_name=sheet_name,
                    dtype=str  # Force string type during reading
                )
                print(f"  Columns in sheet {sheet_name}: {df.columns.tolist()}")

                # Select only the required columns
                required_columns = ['사람 대사', '챗봇 대사', '감정']
                if all(col in df.columns for col in required_columns):
                    df = df[required_columns].copy()
                    # Clean the data
                    df = df.fillna('')  # Replace NaN with empty string
                    df['이름'] = persona_name  # Add persona name
                    person_dialogue_dfs.append(df)
                else:
                    print(f"  Warning: Required columns not found in sheet {sheet_name}")
                    print(f"  Available columns: {df.columns.tolist()}")
                    print(f"  Required columns: {required_columns}")
            except Exception as e:
                print(f"  Error processing {excel_file}, sheet {sheet_name}: {e}")

        if person_dialogue_dfs:
            dialoguse = pd.concat(person_dialogue_dfs, ignore_index=True)
            dialogue_datas.append(dialoguse)

    if not dialogue_datas:
        raise ValueError("No data was loaded from the Excel files")

    persona_datas = pd.concat(dialogue_datas, ignore_index=True)

    # Clean and prepare data
    persona_datas = persona_datas.fillna('')  # Replace any remaining NaN
    persona_datas = persona_datas.astype(str)  # Ensure string type

    # Rename columns to match expected format
    column_mapping = {
        '사람 대사': 'Q',
        '챗봇 대사': 'A',
        '감정': 'E',
        '이름': 'N'
    }
    persona_datas = persona_datas.rename(columns=column_mapping)

    # Handle emotion field
    persona_datas['E'] = persona_datas['E'].replace({'': '감정없음', 'nan': '감정없음', 'None': '감정없음'})

    # Remove any rows with empty essential fields
    persona_datas = persona_datas[
        (persona_datas['Q'].str.strip() != '') &
        (persona_datas['A'].str.strip() != '')
    ].reset_index(drop=True)

    print(f"Total examples loaded: {len(persona_datas)}")
    print(f"Final columns: {persona_datas.columns.tolist()}")

    # Format the text for training
    texts = []
    for _, row in persona_datas.iterrows():
        formatted_text = f"Human: {row['Q'].strip()}\nAssistant: {row['A'].strip()}\nEmotion: {row['E'].strip()}\nName: {row['N'].strip()}"
        # formatted_text = f'<startoftext>이름:{row.N}\n질문:{row.Q}\n답변:{row.A}\n감정: {row.E} <endoftext>'
        texts.append(formatted_text)

    # 학습:검증 데이터 분리 (9:1)
    train_size = int(len(texts) * 0.9)

    # Create datasets directly from lists
    try:
        train_dataset = Dataset.from_dict({"text": texts[:train_size]})
        eval_dataset = Dataset.from_dict({"text": texts[train_size:]})

        # Verify the datasets
        print("Train dataset size:", len(train_dataset))
        print("Eval dataset size:", len(eval_dataset))

        # Verify data type
        print("Sample text from train dataset:", train_dataset[0]['text'])

        return train_dataset, eval_dataset

    except Exception as e:
        print("Error creating datasets:", str(e))
        raise

## 4. 모델 및 토크나이저 준비 함수

In [8]:
def prepare_model_and_tokenizer():
    print("Loading model and tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID,
                                              # bos_token='<startoftext>',
                                              # eos_token='<endoftext>',
                                              use_fast=False,
                                              cache_dir=DATA_DIR)
    # tokenizer.pad_token_id = (
    #     0  # 엉. 우리는 이것이 EOS 토큰과 다르기를 원합니다.
    # )
    # Load in 8bit to save memory
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float16,  # Use float16 for better memory efficiency
        device_map="auto",
        cache_dir=DATA_DIR
    )

    # Enable gradient computation
    model.train()  # Set to training mode
    model.config.use_cache = False  # Disable cache for training

    # LoRA 설정 적용
    peft_config = LoraConfig(
        task_type="CAUSAL_LM",
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        target_modules=LORA_TARGET_MODULES,
        bias="none",
        inference_mode=False,
    )

    model = get_peft_model(model, peft_config)

    # Enable training for all LoRA parameters
    for name, param in model.named_parameters():
        if "lora" in name:
            param.requires_grad = True
            print(f"Parameter {name} requires_grad: {param.requires_grad}")
        else:
            param.requires_grad = False

    # Convert trainable parameters to float32
    for param in model.parameters():
        if param.requires_grad:
            param.data = param.data.to(torch.float32)

    model.print_trainable_parameters()


    return model, tokenizer

## 5. 토크나이징 함수

In [9]:
def tokenize_function(examples, tokenizer):
    result = tokenizer(
        examples["text"],
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length",
        return_tensors=None
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [10]:
!pip install openpyxl



## 6. 데이터 준비

In [11]:
# 데이터 준비
train_dataset, eval_dataset = load_and_prepare_data()


Searching for files in /content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset
Found Excel files: ['/content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_신유영.xlsx', '/content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_이도윤.xlsx', '/content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_박강헌.xlsx']


Loading Excel files:   0%|          | 0/3 [00:00<?, ?it/s]

Processing file: /content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_신유영.xlsx
  Reading sheet: 알리바이_대화
  Columns in sheet 알리바이_대화: ['대분류', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 인터뷰_대화
  Columns in sheet 인터뷰_대화: ['대분류', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 가쉽_대화


Loading Excel files:  33%|███▎      | 1/3 [00:09<00:18,  9.21s/it]

  Columns in sheet 가쉽_대화: ['대분류', '소분류', '사람 대사', '챗봇 대사', '감정']
Processing file: /content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_이도윤.xlsx
  Reading sheet: 알리바이_대화
  Columns in sheet 알리바이_대화: ['Unnamed: 0', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 인터뷰_대화
  Columns in sheet 인터뷰_대화: ['Unnamed: 0', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 가쉽_대화


Loading Excel files:  67%|██████▋   | 2/3 [00:20<00:10, 10.55s/it]

  Columns in sheet 가쉽_대화: ['대분류', '소분류', '사람 대사', '챗봇 대사', '감정']
Processing file: /content/drive/MyDrive/ToyProject/for_Colab/LiarHeart_dataset/페르소나 데이터_박강헌.xlsx
  Reading sheet: 알리바이_대화
  Columns in sheet 알리바이_대화: ['대분류', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 인터뷰_대화
  Columns in sheet 인터뷰_대화: ['대분류', '소분류', 'Unnamed: 2', '사람 대사', '챗봇 대사', '감정']
  Reading sheet: 가쉽_대화


Loading Excel files: 100%|██████████| 3/3 [00:32<00:00, 10.75s/it]

  Columns in sheet 가쉽_대화: ['대분류', '소분류', '사람 대사', '챗봇 대사', '감정']
Total examples loaded: 40760
Final columns: ['Q', 'A', 'E', 'N']





Train dataset size: 36684
Eval dataset size: 4076
Sample text from train dataset: Human: 범행 전날 00시에는 누구와 있었나요?
Assistant: 저... 자취해서... 혼자 있었어요.......
Emotion: 감정없음
Name: 신유영


In [12]:
print(train_dataset[0])

{'text': 'Human: 범행 전날 00시에는 누구와 있었나요?\nAssistant: 저... 자취해서... 혼자 있었어요.......\nEmotion: 감정없음\nName: 신유영'}


## 7. 모델 및 토크나이저 준비

In [13]:
# 모델 및 토크나이저 준비
model, tokenizer = prepare_model_and_tokenizer()

Loading model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Parameter base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight requires_grad: True
Parameter base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight requires_grad: T

In [14]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 1536)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=1536, out_features=1536, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.15, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1536, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=1536, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(
 

In [15]:
class GPUDataCollator:
    def __init__(self, base_collator, device):
        self.base_collator = base_collator
        self.device = device

    def __call__(self, examples):
        batch = self.base_collator(examples)
        # Move batch to GPU
        return {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
                for k, v in batch.items()}

## 8. 데이터 토크나이징

In [16]:


# 데이터 토크나이징
print("Tokenizing datasets...")
tokenized_train = train_dataset.map(
    lambda x: tokenize_function(x, tokenizer),
    batched=True,
    remove_columns=["text"]
)

tokenized_eval = eval_dataset.map(
    lambda x: tokenize_function(x, tokenizer),
    batched=True,
    remove_columns=["text"]
)

Tokenizing datasets...


Map:   0%|          | 0/36684 [00:00<?, ? examples/s]

Map:   0%|          | 0/4076 [00:00<?, ? examples/s]

In [17]:
import torch.multiprocessing as mp
mp.set_start_method('spawn', force=True)

## 9. 학습 설정 및 실행

In [None]:
# Get model's device
device = next(model.parameters()).device
print(f"\nModel is on device: {device}")

base_data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
        pad_to_multiple_of=8
    )

# Wrap the data collator with GPU support
data_collator = GPUDataCollator(base_data_collator, device)

os.makedirs(TB_LOG_DIR, exist_ok=True)

training_args = TrainingArguments(
    # 기본 설정
    output_dir=OUTPUT_DIR,
    overwrite_output_dir=True,

    # 학습 기본 파라미터
    num_train_epochs=3,                    # 증가: 더 나은 수렴을 위해
    per_device_train_batch_size=2,         # 감소: VRAM 관리를 위해
    per_device_eval_batch_size=4,          # 감소: VRAM 관리를 위해
    gradient_accumulation_steps=16,        # 증가: 유효 배치 크기 유지 (2*16=32)

    # 옵티마이저 설정
    learning_rate=2e-4,                    # 감소: 더 안정적인 학습을 위해
    weight_decay=0.01,                     # 유지
    warmup_ratio=0.03,                     # 감소: 빠른 초기 학습을 위해

    # 저장 및 평가 전략
    logging_steps=5,                       # 감소: 더 자주 로깅
    save_strategy="steps",
    save_steps=200,                        # 감소: 더 자주 저장
    evaluation_strategy="steps",
    eval_steps=200,                        # 감소: 더 자주 평가
    save_total_limit=3,                    # 추가: 디스크 공간 관리

    # 메모리 최적화
    bf16=True,                             # 유지: L4 GPU에 최적화
    gradient_checkpointing=True,           # 유지: 메모리 효율성
    dataloader_num_workers=2,              # 감소: 시스템 부하 감소

    # 성능 최적화
    remove_unused_columns=True,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,

    # 배치 최적화
    group_by_length=True,                  # 유지: 시퀀스 길이별 그룹핑
    length_column_name="length",           # 추가: 길이 기반 배치 구성

    # 추가 최적화
    prediction_loss_only=True,
    label_names=["labels"],
    ddp_find_unused_parameters=False,      # 추가: DDP 최적화
    torch_compile=True,                    # 추가: PyTorch 2.0 컴파일러 활성화

    # 메모리 관리
    max_grad_norm=1.0,                     # 추가: 그래디언트 클리핑
    gradient_checkpointing_kwargs={
        "use_reentrant": False            # 추가: 메모리 누수 방지
    }
)

# Trainer 초기화 및 학습
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
    callbacks=[CustomCallback(), TensorBoardCallback()],
)

# Move model to device before training
model.to(device)

# 학습 시작
print("Starting training...")
try:
    # Verify model state before training
    print("\nModel state before training:")
    print(f"Training mode: {model.training}")
    print(f"Device: {next(model.parameters()).device}")
    print("Trainable parameters:")
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(f"{name}: requires_grad={param.requires_grad}, dtype={param.dtype}, device={param.device}")

    # Start training
    trainer.train()

except Exception as e:
    print(f"\nError during training: {str(e)}")
    print("\nDebug information:")
    print(f"Model state: {model.training}")
    print(f"Device: {next(model.parameters()).device}")
    print("Sample data batch:")
    sample_batch = data_collator([tokenized_train[0]])
    for k, v in sample_batch.items():
        print(f"{k}: shape {v.shape}, dtype {v.dtype}, device {v.device}")
        # Move tensor to CPU for inspection
        v = v.cpu()
        print(f"First few values: {v.flatten()[:5]}")
    raise


Model is on device: cuda:0


You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
:DefaultFlowCallback
TensorBoardCallback
WandbCallback
CustomCallback


Starting training...

Model state before training:
Training mode: True
Device: cuda:0
Trainable parameters:
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight: requires_grad=True, dtype=torch.float32, device=cuda:0
bas

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33makillness38[0m ([33makillness38-hongik-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


## 10. 모델 저장 및 최종 평가

In [None]:
# 모델 저장
print("Saving model...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

# 최종 평가
print("Final evaluation...")
final_metrics = trainer.evaluate()
print("\n===== Final Evaluation Results =====")
for key, value in final_metrics.items():
    print(f"{key}: {value:.4f}")
print("=" * 50)

print(f"Training complete. Model saved to {OUTPUT_DIR}")
print(f"TensorBoard logs saved to {TB_LOG_DIR}")