# 환경설정

In [1]:
# PEFT로 훈련된 어댑터는 일반적으로 전체 모델보다 훨씬 작기 때문에 공유, 저장 및 가져오기가 편리
!pip install transformers datasets peft accelerate bitsandbytes

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-

# 모델 양자화

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [3]:
model_name = "beomi/KoAlpaca-Polyglot-5.8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_4bit=True  # 4bit 양자화
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/36.8k [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

model-00006-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00004-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00008-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00002-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00003-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00007-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00005-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00001-of-00013.safetensors:   0%|          | 0.00/926M [00:00<?, ?B/s]

model-00009-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00010-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00012-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00011-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00013-of-00013.safetensors:   0%|          | 0.00/515M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/13 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

# lora 설정

In [4]:
from peft import LoraConfig, get_peft_model

In [5]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# 데이터 준비

1. 전처리 확실하게
2. 모델학습 하기 전에 데이터 다 불러와서 gpu 아끼기

In [6]:
from datasets import load_dataset
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

In [15]:
# 1. JSONL 데이터 불러오기
dataset = load_dataset("json", data_files={"train": "AI_feedback_generation.jsonl"})

In [18]:
# 2. 토크나이징
def tokenize(batch):
    prompts = []
    for instruction, input_text, output in zip(batch["instruction"], batch["input"], batch["output"]):
        if isinstance(output, list):
            output = "\n".join(output)
        prompt = f"{instruction}\n{input_text}\n{output}"
        prompts.append(prompt)

    return tokenizer(prompts, truncation=True, padding="max_length", max_length=512)


In [19]:
tokenized_dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/970 [00:00<?, ? examples/s]

In [20]:
# 3. 데이터 콜레이터 (언어 모델용)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [21]:
# 4. 학습 설정
training_args = TrainingArguments(
    output_dir="./koalpaca-feedback-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    logging_dir="./logs",
    save_total_limit=2,
    logging_steps=10,
    save_steps=100,
    fp16=True,
    learning_rate=2e-4,
    report_to="none"
)

In [22]:
# 5. 트레이너 구성 및 학습
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [23]:
trainer.train()



Step,Training Loss
10,3.5143
20,2.4563
30,1.9586
40,1.716
50,1.569
60,1.5131
70,1.463
80,1.4133
90,1.3323
100,1.378


TrainOutput(global_step=363, training_loss=1.3085915214759258, metrics={'train_runtime': 3389.2123, 'train_samples_per_second': 0.859, 'train_steps_per_second': 0.107, 'total_flos': 5.122218366546739e+16, 'train_loss': 1.3085915214759258, 'epoch': 2.9814432989690722})

In [24]:
model.save_pretrained("feedback_adapter")
tokenizer.save_pretrained("feedback_adapter")

('feedback_adapter/tokenizer_config.json',
 'feedback_adapter/special_tokens_map.json',
 'feedback_adapter/tokenizer.json')

In [25]:
!pwd

/content


In [27]:
!zip -r /content/feedback_lora.zip /content/koalpaca-feedback-lora

  adding: content/koalpaca-feedback-lora/ (stored 0%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/ (stored 0%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/training_args.bin (deflated 52%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/tokenizer.json (deflated 83%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/scaler.pt (deflated 60%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/special_tokens_map.json (deflated 65%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/optimizer.pt (deflated 8%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/adapter_config.json (deflated 54%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/scheduler.pt (deflated 56%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/trainer_state.json (deflated 74%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/tokenizer_config.json (deflated 81%)
  adding: content/koalpaca-feedback-lora/checkpoint-300/README.md (deflated 66%)
  ad

In [28]:
trainer.save_model("AI_model")

In [29]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    "beomi/KoAlpaca-Polyglot-5.8B",
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True
)
tokenizer = AutoTokenizer.from_pretrained("beomi/KoAlpaca-Polyglot-5.8B")

model = PeftModel.from_pretrained(base_model, "feedback_adapter")


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/13 [00:00<?, ?it/s]

In [37]:
instruction = "다음 면접 답변에 피드백을 해줘."
input_text = "과적합은 성능이 오히려 나빠지는 학습의 실패입니다. 이를 피하려면 적절한 학습률 설정도 중요합니다."
prompt = f"{instruction}\n{input_text}\n"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
# token_type_ids 제거
inputs.pop("token_type_ids", None)

outputs = model.generate(**inputs, max_new_tokens=300, early_stopping=True)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


다음 면접 답변에 피드백을 해줘.
과적합은 성능이 오히려 나빠지는 학습의 실패입니다. 이를 피하려면 적절한 학습률 설정도 중요합니다.
마지막으로, L1과 L2의 차이를 언급하면 좋습니다.
L1은 과적합, L2는 과편향입니다.
이는 모델의 성능과 직결되므로 중요합니다.
마지막으로, "왜 과적합이 발생하는지"에 대한 설명도 추가하면 좋습니다.

### 답변:과적합은 모델이 너무 많은 입력을 받거나, 너무 작은 입력을 받았을 때 발생합니다. 이는 모델이 새로운 입력을 잘 처리하지 못하기 때문입니다. 이를 피하기 위해서는 적절한 학습률 설정이 필요합니다. L1과 L2의 차이도 중요합니다. L1은 과적합, L2는 과편향입니다. 이는 모델의 성능과 직결되므로 중요합니다. 과적합이 발생하는 이유는 모델이 너무 많은 입력을 받거나 너무 작은 입력을 받기 때문입니다. 이를 피하기 위해서는 적절한 학습률 설정이 필요합니다. 
