In [None]:
from google.colab import drive

# 구글 드라이브 마운트
drive.mount('/content/drive')

In [None]:
!pip install -U "transformers>=4.46.0" "trl==0.9.6" "peft>=0.13.0" "accelerate>=0.34.2" "bitsandbytes>=0.43.3"


In [None]:
# -*- coding: utf-8 -*-
"""
Fine-tune Qwen3-8B with SNOMED term-definition JSONL (messages format)
- QLoRA (4bit) + PEFT (LoRA)
- TRL SFTTrainer로 chat template 적용
- 진행상황 표시: tqdm 진행바 + 스텝별 콘솔 로그 + TensorBoard 로깅
"""

import os, json
from dataclasses import dataclass
from typing import Dict, List, Any

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    TrainerCallback,            # ✅ 추가: 콘솔 로그 콜백용
)
from trl import SFTTrainer
from peft import LoraConfig

# ============== 사용자 설정 ==============
MODEL_NAME = "/content/drive/MyDrive/DILAB/qwen3-8b"   # 또는 "Qwen/Qwen2.5-8B-Instruct" 등
DATA_JSONL = "/content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/snomed_term_definition_only.jsonl"

OUTPUT_DIR = "/content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Models/qwen3_8b_snomed_lora"
MICRO_BATCH = 4
GRAD_ACCUM = 8                  # 유효 배치 = MICRO_BATCH * GRAD_ACCUM
LR = 2e-5
EPOCHS = 2
MAX_SEQ_LEN = 1024
USE_FLASH_ATTN = False
BF16 = torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8

# TensorBoard 로그 경로 (OUTPUT_DIR 하위에 저장)
TB_LOGDIR = os.path.join(OUTPUT_DIR, "tb_logs")
# ========================================

def get_tokenizer(model_name: str):
    tok = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    return tok

def format_with_chat_template(tokenizer, messages: List[Dict[str, str]]) -> str:
    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
        truncation=True
    )

def make_dataset(tokenizer, data_path: str):
    ds = load_dataset("json", data_files=data_path, split="train")
    def map_fn(ex):
        msgs = ex["messages"]
        text = format_with_chat_template(tokenizer, msgs)
        return {"text": text}
    ds = ds.map(map_fn, remove_columns=ds.column_names)
    return ds

# ✅ 스텝별로 콘솔에 손실/학습률을 출력하는 콜백
class LossPrinterCallback(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if not logs:
            return
        # 주요 지표만 선별 출력
        keys = ["loss", "learning_rate", "grad_norm", "epoch"]
        msg = " | ".join([f"{k}: {logs[k]:.6f}" for k in keys if k in logs])
        if msg:
            print(f"[step {state.global_step}] {msg}")

def main():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16 if BF16 else torch.float16
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        quantization_config=bnb_config,
        attn_implementation="flash_attention_2" if USE_FLASH_ATTN else "eager",
        torch_dtype=torch.bfloat16 if BF16 else torch.float16,
        device_map="auto",
    )
    tok = get_tokenizer(MODEL_NAME)

    lora_cfg = LoraConfig(
        r=32,
        lora_alpha=64,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    )

    train_ds = make_dataset(tok, DATA_JSONL)

    args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=EPOCHS,
        per_device_train_batch_size=MICRO_BATCH,
        gradient_accumulation_steps=GRAD_ACCUM,
        learning_rate=LR,
        lr_scheduler_type="cosine",
        warmup_ratio=0.05,

        # ✅ 진행바/로그 관련
        logging_steps=10,                   # 10스텝마다 로그 이벤트 발생
        disable_tqdm=False,                 # tqdm 진행바 활성화
        report_to=["tensorboard"],          # ✅ TensorBoard 로깅
        logging_dir=TB_LOGDIR,              # ✅ 로그 저장 폴더

        save_steps=1000,
        save_total_limit=2,

        bf16=BF16,
        fp16=not BF16,
        optim="paged_adamw_32bit",
        gradient_checkpointing=True,
        max_grad_norm=1.0,
    )

    trainer = SFTTrainer(
        model=model,
        tokenizer=tok,
        peft_config=lora_cfg,
        train_dataset=train_ds,
        dataset_text_field="text",
        max_seq_length=MAX_SEQ_LEN,
        packing=True,
        args=args,
        callbacks=[LossPrinterCallback()],   # ✅ 콘솔 로그 콜백 추가
    )

    trainer.train()
    trainer.save_model()
    tok.save_pretrained(OUTPUT_DIR)

if __name__ == "__main__":
    main()


# 용어 정의 제대로 하는지 검증(영어 출력 버전)

In [None]:
# Inference: Ask the fine-tuned model to define a term
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

# ==== 경로 설정 ====
BASE_MODEL   = "/content/drive/MyDrive/DILAB/qwen3-8b"  # 베이스 모델(또는 허깅페이스 경로)
ADAPTER_DIR  = "/content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Models/qwen3_8b_snomed_lora"  # LoRA 어댑터
USE_4BIT     = True
USE_BF16     = torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8

# ==== 모델/토크나이저 로드 ====
bnb = None
if USE_4BIT:
    bnb = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16 if USE_BF16 else torch.float16
    )

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb,
    trust_remote_code=True,
    device_map="auto",
    dtype=torch.bfloat16 if USE_BF16 else torch.float16,  # torch_dtype deprec → dtype
)
tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

# LoRA 어댑터 부착 (병합 모델이면 이 부분 건너뜀)
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
model.eval()

def define_term_en(term: str, max_new_tokens: int = 120, deterministic: bool = True) -> str:
    """
    Return a concise English definition (1–2 sentences) for a medical term.
    - Strong system prompt forces brevity & English.
    - Slice by token length to avoid prompt-bleed.
    """
    messages = [
        {"role": "system", "content":
         "You are a clinical assistant. Provide a concise, accurate definition in English, "
         "limited to 1–2 sentences. Avoid preambles or meta commentary."},
        {"role": "user", "content": term}
    ]

    # Build prompt with the model's chat template
    prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tok([prompt], return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=not deterministic,   # for strict determinism set to False
            temperature=0.2,
            top_p=0.9,
            eos_token_id=tok.eos_token_id,
            pad_token_id=tok.pad_token_id
        )

    # NEW: slice by token length (not by string length)
    gen_ids = out[0]
    new_ids = gen_ids[inputs["input_ids"].shape[1]:]
    answer = tok.decode(new_ids, skip_special_tokens=True).strip()

    # optional: ultra-concise post-trim (keep 2 sentences max)
    # import re
    # sents = re.split(r'(?<=[.!?])\s+', answer)
    # answer = ' '.join(sents[:2]).strip()

    return answer

print(define_term_en("Asthma"))
print(define_term_en("Myocardial infarction"))



## 한국어 출력 버전

In [None]:
# Inference: Ask the fine-tuned model to define a term
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

# ==== 경로 설정 ====
BASE_MODEL   = "/content/drive/MyDrive/DILAB/qwen3-8b"  # 베이스 모델(또는 허깅페이스 경로)
ADAPTER_DIR  = "/content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Models/qwen3_8b_snomed_lora"  # LoRA 어댑터
USE_4BIT     = True
USE_BF16     = torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8

# ==== 모델/토크나이저 로드 ====
bnb = None
if USE_4BIT:
    bnb = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16 if USE_BF16 else torch.float16
    )

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb,
    trust_remote_code=True,
    device_map="auto",
    dtype=torch.bfloat16 if USE_BF16 else torch.float16,  # torch_dtype deprec → dtype
)
tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

# LoRA 어댑터 부착 (병합 모델이면 이 부분 건너뜀)
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
model.eval()

def define_term_ko(term: str, max_new_tokens: int = 200, deterministic: bool = True) -> str:
    # 1) 한국어 지시를 system에 명시
    messages = [
        {"role": "system", "content":
         "당신은 임상 지식을 가진 의료 보조자입니다. "
         "사용자가 제시한 의학 용어를 한국어로 간결하고 정확하게 정의하세요. "
         "불필요한 서론/메모/추측은 금지하고 1~3문장으로 답하세요."},
        {"role": "user", "content": term}
    ]
    prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # 2) 토큰화
    inputs = tok([prompt], return_tensors="pt").to(model.device)

    # 3) 생성
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=not deterministic,   # 정의문은 보통 결정적 생성 권장
            temperature=0.2,
            top_p=0.9,
            eos_token_id=tok.eos_token_id,
            pad_token_id=tok.pad_token_id
        )

    # 4) "토큰 길이" 기준으로 신규 토큰만 추출 → 디코드
    gen_ids = out[0]
    new_token_ids = gen_ids[inputs["input_ids"].shape[1]:]  # ← 핵심!
    answer = tok.decode(new_token_ids, skip_special_tokens=True).strip()
    return answer

# === 사용 예시 ===
print(define_term_ko("Asthma"))
print(define_term_ko("Myocardial infarction"))
