# Data download & calling

In [None]:
from huggingface_hub import login
login(new_session = False)

In [None]:
!pip install -U bitsandbytes trl peft

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting trl
  Downloading trl-0.24.0-py3-none-any.whl.metadata (11 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.24.0-py3-none-any.whl (423 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.48.2 trl-0.24.0


In [None]:
import os
import torch
import math

from datasets import load_dataset, load_from_disk, DatasetDict
from tqdm import tqdm
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model, PeftModel
from peft.utils.other import prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset = load_from_disk("/content/drive/MyDrive/Fall2025/CSCI_544/Project/dataset")

# Preparing Data for LLM Feeding
## Change to a specific template (Instruction-Tuning)

In [None]:
def to_sft_text(ex):
  text = f"### Instruction:\n{ex['instruction'].strip()}\n\n### Response:\n{ex['output'].strip()}"
  return {"text": text}

In [None]:
dataset_proc = DatasetDict()

dataset_proc['train'] = dataset['train'].map(
  to_sft_text, remove_columns=dataset['train'].column_names
)

dataset_proc['test'] = dataset['test'].map(
    to_sft_text, remove_columns=dataset['test'].column_names
)

# Train & Save Model

In [None]:
# Call Model & Tokenizer
MODEL_ID = 'google/gemma-3-1b-it'
MAX_SEQ_LEN = 512

bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config = bnb,
    device_map = 'auto',
)

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/899 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

In [None]:
# QLoRA setting
model = prepare_model_for_kbit_training(model)
# model.gradient_checkpointing_enable()
lora_config = LoraConfig(
    r = 32,
    lora_alpha = 32,
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
    lora_dropout = 0.05,
    bias = 'none',
    task_type = 'CAUSAL_LM',
)
model = get_peft_model(model, lora_config)

In [None]:
args = SFTConfig(
    output_dir="/content/drive/MyDrive/Fall2025/CSCI_544/Project/fine_tuned_gemma",
    per_device_train_batch_size=4,      # 4bit라면 2→4까지 올려도 보통 OK
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    warmup_ratio=0.05,
    max_steps=5000,                     # 먼저 짧게 검증 후 늘리기
    bf16=False, fp16=True,              # bfloat16 안되면 bf16=False, fp16=True
    gradient_checkpointing=True,
    lr_scheduler_type="cosine",
    logging_steps=10, save_steps=200, eval_steps=200,
    save_total_limit=2, report_to="none",
    optim="paged_adamw_8bit",           # QLoRA 권장 옵티마이저
    completion_only_loss = True,

    # Early stopping 관련 필수 옵션 추가
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    eval_strategy="steps",
    save_strategy="steps",
  )

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset_proc['train'],
    eval_dataset=dataset_proc['test'],
    processing_class = tokenizer,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
)

Adding EOS to train dataset:   0%|          | 0/563200 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/563200 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/563200 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/11000 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/11000 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/11000 [00:00<?, ? examples/s]

In [None]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 1}.


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
200,0.0984,0.096163,0.095922,1838763.0,0.962042
400,0.0777,0.076944,0.080315,3677115.0,0.967844
600,0.0716,0.071009,0.072562,5518241.0,0.969098
800,0.0704,0.069605,0.067504,7354377.0,0.97017
1000,0.0693,0.067556,0.067404,9192151.0,0.970468
1200,0.0671,0.067192,0.069514,11026414.0,0.971023
1400,0.0666,0.065398,0.065717,12864745.0,0.971289
1600,0.065,0.064622,0.0651,14706783.0,0.971299
1800,0.0657,0.065269,0.063752,16543277.0,0.971556
2000,0.0643,0.064037,0.062821,18384364.0,0.971479


In [None]:
ADAPTER_DIR = "/content/drive/MyDrive/Fall2025/CSCI_544/Project/gemma_qlora_adapter_first_try"
trainer.model.save_pretrained(ADAPTER_DIR)
tokenizer.save_pretrained(ADAPTER_DIR)

### 여기까지만 수행 후, gemma_qlora_adapter_first_try 폴더만 보내주면 됨!

# Call the saved model

In [None]:
ADAPTER_DIR = "/content/drive/MyDrive/Fall2025/CSCI_544/Project/checkpoint-4800"
BASE_MODEL  = "google/gemma-3-1b-it"

bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config = bnb,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_DIR)
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(base_model, ADAPTER_DIR)
model.eval()

In [None]:
# 0) 토크나이저 안전 설정 (평가 전에 한 번만)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"   # 디코더-온리 모델은 left padding 권장

import re
import torch
from tqdm.auto import tqdm

def _normalize_text(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip()).lower()

def evaluate_on_dataset(model, tokenizer, test_ds, batch_size=8, max_new_tokens=16):
    device = model.device
    total = exact_correct = actual_correct = 0

    ACTIONS = {"check","call","fold","bet","raise","allin"}
    ALIAS   = {"all-in":"allin","all_in":"allin"}

    def action_only(s: str):
        if not s:
            return None
        toks = re.split(r"[\s:;/,]+", s.strip().lower())
        for t in toks:
            a = ALIAS.get(t, t)
            if a in ACTIONS:
                return a
        return None

    for i in tqdm(range(0, len(test_ds), batch_size), desc="Evaluating"):
        batch = [test_ds[j] for j in range(i, min(i+batch_size, len(test_ds)))]

        # 케이스별로 (prompt, gold) 추출
        prompts, golds = [], []
        for ex in batch:
            if "prompt" in ex and "completion" in ex:
                prompts.append(ex["prompt"]); golds.append(ex["completion"])
            elif "instruction" in ex and "output" in ex:
                prompts.append(ex["instruction"]); golds.append(ex["output"])
            elif "text" in ex and "### Response:" in ex["text"]:
                inst = ex["text"].split("### Instruction:",1)[-1].split("### Response:",1)[0].strip()
                gold = ex["text"].split("### Response:",1)[-1].strip()
                prompts.append(inst); golds.append(gold)
            # 그 외 포맷은 스킵

        if not prompts:
            continue

        # 학습 때와 동일한 SFT 프롬프트로 감싸기
        enc = tokenizer(
            [f"### Instruction:\n{p.strip()}\n\n### Response:\n" for p in prompts],
            return_tensors="pt", padding=True, truncation=True, max_length=512
        )
        enc = {k: v.to(device) for k, v in enc.items()}

        with torch.no_grad():
            outs = model.generate(
                **enc,
                max_new_tokens=max_new_tokens,
                do_sample=False,             # 결정적으로
                top_p=1.0,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )

        # 프롬프트 길이만큼 잘라서 생성부만 디코딩
        gen_only = outs[:, enc["input_ids"].shape[1]:]
        preds = tokenizer.batch_decode(gen_only, skip_special_tokens=True)

        for pred_raw, gold_raw in zip(preds, golds):
            total += 1

            # 빈 결과 안전 처리
            pred_first = (pred_raw or "").strip().splitlines()
            pred_line  = pred_first[0].strip() if pred_first else ""

            gold_first = (gold_raw or "").strip().splitlines()
            gold_line  = gold_first[0].strip() if gold_first else ""

            # Exact match: 공백/대소문자 무시
            if _normalize_text(pred_line) == _normalize_text(gold_line):
                exact_correct += 1

            # Actual accuracy: 액션만 비교
            pa, ga = action_only(pred_line), action_only(gold_line)
            if pa is not None and ga is not None and pa == ga:
                actual_correct += 1

    return {
        "total": total,
        "actual_accuracy": actual_correct / max(1, total),
        "exact_match":     exact_correct   / max(1, total),
    }
metrics = evaluate_on_dataset(model, tokenizer, dataset_proc["test"], batch_size=8, max_new_tokens=16)
print(metrics)


# ======================================

In [None]:
import torch, re

# 0) 안전 설정
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model.eval()

# 1) 액션 추출 함수 (AA 기준)
ACTIONS = {"check", "call", "fold", "bet", "raise", "allin"}
ALIAS   = {"all-in": "allin", "all_in": "allin", "all in": "allin"}

def _extract_action(s: str):
    if not s:
        return None
    toks = re.split(r"[\s:;/,]+", s.strip().lower())
    for t in toks:
        a = ALIAS.get(t, t)
        if a in ACTIONS:
            return a
    return None

# 2) 데이터에서 prompt/gold 꺼내기
def _get_prompt_and_gold(ex):
    if "prompt" in ex and "completion" in ex:
        return ex["prompt"], ex["completion"]
    if "instruction" in ex and "output" in ex:
        return ex["instruction"], ex["output"]
    if "text" in ex and "### Response:" in ex["text"]:
        inst = ex["text"].split("### Instruction:",1)[-1].split("### Response:",1)[0].strip()
        gold = ex["text"].split("### Response:",1)[-1].strip()
        return inst, gold
    return None, None

# 3) 평가용 샘플
test_ds = dataset_proc["test"]
limit = min(100, len(test_ds))
shown = 0

for idx in range(limit):
    ex = test_ds[idx]
    inst, gold = _get_prompt_and_gold(ex)
    if inst is None:
        continue

    prompt = f"### Instruction:\n{inst.strip()}\n\n### Response:\n"

    # 모델 추론
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outs = model.generate(
            **inputs,
            max_new_tokens=64,
            do_sample=False,
            top_p=1.0,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            return_dict_in_generate=True
        )

    # 전체 (입력+출력) 디코딩
    full_text = tokenizer.decode(outs.sequences[0], skip_special_tokens=True)

    # 모델 생성부만 잘라내서 액션 비교
    gen_only = outs.sequences[0][inputs["input_ids"].shape[1]:]
    pred_text = tokenizer.decode(gen_only, skip_special_tokens=True).strip()

    # 액션 비교
    pa, ga = _extract_action(pred_text), _extract_action(gold)
    is_match = (pa is not None and ga is not None and pa == ga)

    if not is_match:
        shown += 1
        print("\n" + "="*80)
        print(f"[index] {idx}")
        print("### Instruction:")
        print(inst.strip())
        print("\n### Gold:")
        print((gold or '').strip())
        print("\n### Model Output (FULL: prompt + generated):")
        print(full_text.strip())
        print("\n### Parsed Actions:")
        print(f"  gold = {ga}, pred = {pa}")
        print("="*80)

print(f"\n총 {limit}개 중 {shown}개 mismatch 케이스를 출력했습니다.")
