### Phi3-4K-mini 모델불러오기

In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import torch
import signal
from contextlib import contextmanager
import time

@contextmanager
def timeout(seconds):
    def signal_handler(signum, frame):
        raise TimeoutError(f"Timeout after {seconds} seconds")
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)

# 캐시 디렉토리 설정
cache_dir = os.path.join(os.getcwd(), "model_cache")
os.makedirs(cache_dir, exist_ok=True)

try:
    print("토크나이저 로딩 시작...")
    with timeout(30):  # 30초 타임아웃
        tokenizer = AutoTokenizer.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct",
            use_fast=False,
            local_files_only=False,
            trust_remote_code=True,
            cache_dir=cache_dir
        )
    print("토크나이저 로딩 완료!")

    print("\n모델 로딩 시작...")
    with timeout(60):  # 60초 타임아웃
        model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct",
            torch_dtype=torch.float16,
            trust_remote_code=True,
            device_map="auto",
            cache_dir=cache_dir,
            low_cpu_mem_usage=True
        )
    print("모델 로딩 완료!")

    # 테스트 메시지
    messages = [
        {"role": "user", "content": "Hello, how are you?"},
    ]
    
    print("\n입력 처리 중...")
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt"
    ).to(model.device)
    
    print("생성 시작...")
    with timeout(30):  # 30초 타임아웃
        outputs = model.generate(
            **inputs, 
            max_new_tokens=50,  # 더 짧게 설정
            num_return_sequences=1,
            temperature=0.7
        )
    response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    print("\n응답:", response)

except TimeoutError as te:
    print(f"\n시간 초과 발생: {str(te)}")
except Exception as e:
    print(f"\n에러 발생: {str(e)}")
    import traceback
    print("\n상세 에러 정보:")
    print(traceback.format_exc())

토크나이저 로딩 시작...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]


시간 초과 발생: Timeout after 30 seconds


In [1]:
%pip install -U "transformers>=4.45" accelerate safetensors sentencepiece tokenizers

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
# pip install -U transformers datasets peft accelerate bitsandbytes trl
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

BASE = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(BASE)

# (옵션) 4비트 로드로 QLoRA
load_kwargs = dict(
    device_map="auto",
    torch_dtype="auto",
    load_in_4bit=True, # QLoRA면 True 그냥LoRA면 False
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(BASE, **load_kwargs)
model = prepare_model_for_kbit_training(model)

lora = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora)

# 예시용 데이터셋: {"messages":[{"role":"user","content":"..."},{"role":"assistant","content":"..."}]}
ds = load_dataset("json", data_files={"train":"train.jsonl", "eval":"eval.jsonl"})

def format_example(ex):
    # Phi-3 chat 템플릿 활용해서 supervised target 만들기
    msgs = ex["messages"]
    text = tokenizer.apply_chat_template(msgs, add_generation_prompt=False, tokenize=False)
    return {"text": text}

ds = ds.map(format_example, remove_columns=ds["train"].column_names)

args = TrainingArguments(
    output_dir="phi3_lora_out",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=20,
    save_steps=200,
    eval_strategy="steps",
    fp16=True,                               # Ampere↑에서 bf16도 가능
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=ds["train"],
    eval_dataset=ds["eval"],
    args=args,
    dataset_text_field="text",
    packing=True,                            # 여러 샘플을 한 시퀀스로 패킹(효율↑)
    max_seq_length=2048
)

trainer.train()
model.save_pretrained("phi3_lora_out/adapter")
tokenizer.save_pretrained("phi3_lora_out/adapter")


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

In [4]:
%pip install trl

Collecting trl
  Using cached trl-0.23.1-py3-none-any.whl.metadata (11 kB)
Collecting transformers>=4.56.1 (from trl)
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate>=1.4.0->trl)
  Using cached huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers>=4.56.1->trl)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Using cached trl-0.23.1-py3-none-any.whl (564 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
Using cached huggingface_hub-0.35.3-py3-none-any.whl (564 kB)
Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl (2.7 MB)
Installing collected packages: huggingface_hub, tokenizers, transformers, trl

  Attempting uninstall: huggingface_hub

    Found existing installation: huggingface-hub 0.29.3

    Uninstalling huggingface-hub-0.29.3:

      Successfully uninstalled huggingface-hub-0.29.3

   ----------------

  You can safely remove it manually.
