# Set Up

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
import sys
from google.colab import drive
import os

drive.mount('/content/drive')
drive_path = os.path.join('drive', 'MyDrive', 'Colab Notebooks', 'Dacon', 'SentenceOrder') # 다른 드라이브에서 사용할 경우, 이 부분만 적절히 수정

if drive_path not in sys.path:
  sys.path.append(drive_path)
from utility import *

SEED=42
CONFIG_PATH = drive_path + '/config.yaml'

set_all_seed(SEED)
config = load_config(CONFIG_PATH)

In [None]:
train = pd.read_csv(drive_path + '/dataset/final_train_df.csv') # 데이터를 증강하여 만든 최종 데이터 프레임
len(train) # 데이터 수는 총 61798

# Load the Model and the Tokenizer

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
  model_name = config['model_name'], # unsloth/Qwen3-14B
  max_seq_length = 512,
  load_in_4bit = True, # 4bit 양자화
)

# Create a Dataset

In [None]:
training_message = [
    {"role": "system", "content": "You are an expert at understanding the logical flow of sentences. Your task is to arrange four given Korean sentences into a coherent and natural paragraph. Output the ordered sequence of sentence indices, separated by commas. /no_think"},
    {"role": "user", "content": "Provided Sentences:\n0. {sentence_0}\n1. {sentence_1}\n2. {sentence_2}\n3. {sentence_3}"},
    {"role": "assistant", "content": "{answer_0}, {answer_1}, {answer_2}, {answer_3}"}
]

train_chat_template = tokenizer.apply_chat_template(training_message, tokenize=False, add_generation_prompt=False, enable_thinking=False)

In [None]:
train_texts = []

for i, row in train.iterrows():
  train_texts.append(formatting_prompts(row, train_chat_template, with_answers=True))

train_dataset = Dataset.from_dict({'text': train_texts})

In [None]:
response_template = """<|im_start|>assistant
<think>

</think>

"""

data_collator = DataCollatorForCompletionOnlyLM( # 이 collator를 이용하여 정답 시퀀스를 제외한 토큰은 손실 계산에 포함 X
    response_template=response_template,
    tokenizer=tokenizer,
    mlm=False,
    return_tensors="pt",
)

# Set Up for the Training

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    random_state = SEED,
    **config['lora']
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    data_collator=data_collator,
    args = SFTConfig(
      output_dir=drive_path + "/models/qwen_14b_finetuning-r32-alpha32-unsloth",
      seed=SEED,
      per_device_train_batch_size=32,
      gradient_accumulation_steps=2,
      bf16 = True,
      optim = "paged_adamw_32bit",
      num_train_epochs=3,
      logging_steps=20,
      warmup_ratio=0.05,
      logging_strategy="steps",
      learning_rate=8e-5,
      weight_decay=0.01,
      report_to="tensorboard",
      save_strategy="steps",
      save_total_limit=20,
      save_steps=100,
      lr_scheduler_type="linear",
  )
)

# Train

In [None]:
trainer.train() # 첫 훈련 시
#trainer.train(resume_from_checkpoint=True) # 이어서 훈련할 시

# 총 2898 스텝 훈련