<a href="https://colab.research.google.com/github/ermk006/BERT_PEFT_LoRA/blob/main/Japanese_AI_vs_Human_Classifier_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🇯🇵 Japanese AI vs Human Classifier — Colab Notebook
このノートブックは **日本語短文（50–300字）** を **AI生成(1) / 人間(0)** に2値分類する学習テンプレです。  
バックエンドは Hugging Face Transformers + Datasets + PEFT(LoRA)。GPUランタイム推奨（Colabの「ランタイム」→「ランタイムのタイプを変更」→ GPU）。


In [None]:
#@title 1) セットアップ（インストール）
!pip -q install --upgrade pip
!pip -q install transformers>=4.41.0 datasets>=2.20.0 accelerate>=0.30.0 peft>=0.11.0 scikit-learn>=1.3.0 pandas>=2.0.0 numpy>=1.24.0
# Install fugashi for MecabTokenizer
!pip -q install mecab-python3 unidic-lite
!pip -q install fugashi ipadic

import torch, platform
print("Torch:", torch.__version__, "| CUDA available:", torch.cuda.is_available(), "| Python:", platform.python_version())

In [None]:
#@title 2) Google Drive をマウント（任意）
# ドライブ上のCSV（train.csv / valid.csv）を使う場合に実行
use_drive = False  #@param {type:"boolean"}
if use_drive:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Drive mounted at /content/drive")

In [None]:
#@title 3) データの場所を指定
# 形式: CSV with columns: text,label（0=human, 1=ai）
# 例）/content/train.csv, /content/valid.csv
train_csv = "/content/train.csv"  #@param {type:"string"}
valid_csv = "/content/valid.csv"  #@param {type:"string"}

In [None]:
#@title 4) 学習設定と実行（LoRA推奨）
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoConfig, AutoModelForSequenceClassification,
                          DataCollatorWithPadding, TrainingArguments, Trainer, EarlyStoppingCallback)
from transformers.trainer_utils import EvalPrediction
from peft import LoraConfig, TaskType, get_peft_model
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np, json, os, torch, torch.nn as nn

model_name_or_path = "cl-tohoku/bert-base-japanese-v3"  #@param ["cl-tohoku/bert-base-japanese-v3","ku-nlp/deberta-v3-base-japanese","rinna/japanese-roberta-base","xlm-roberta-large"] {allow-input: true}
output_dir = "/content/out/bertv3_ai_detector"  #@param {type:"string"}
use_lora = True  #@param {type:"boolean"}
lora_r = 16  #@param {type:"integer"}
lora_alpha = 32  #@param {type:"integer"}
lora_dropout = 0.05  #@param {type:"number"}
max_length = 256  #@param {type:"integer"}
num_train_epochs = 4  #@param {type:"integer"}
train_bs = 16  #@param {type:"integer"}
eval_bs = 32  #@param {type:"integer"}
learning_rate = 5e-5  #@param {type:"number"}
early_stopping_patience = 2  #@param {type:"integer"}
tune_threshold = True  #@param {type:"boolean"}
seed = 42  #@param {type:"integer"}

os.makedirs(output_dir, exist_ok=True)

# Load datasets
raw = load_dataset("csv", data_files={"train": train_csv, "validation": valid_csv})
tok = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token if getattr(tok, "eos_token", None) else tok.unk_token

def preprocess(ex):
    return tok(ex["text"], truncation=True, max_length=max_length)

tokenized = raw.map(preprocess, batched=True, remove_columns=[c for c in raw["train"].column_names if c not in ["text","label"]])
tokenized = tokenized.rename_column("label","labels")

cfg = AutoConfig.from_pretrained(model_name_or_path, num_labels=2)
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=cfg)

if use_lora:
    target_modules = ["query","value"] if "bert" in (cfg.model_type or "") else (["q_proj","v_proj"] if "roberta" in cfg.model_type or "xlm" in cfg.model_type else ["query_proj","value_proj"])
    lcfg = LoraConfig(task_type=TaskType.SEQ_CLS, r=lora_r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, bias="none", target_modules=target_modules)
    model = get_peft_model(model, lcfg)
    model.print_trainable_parameters()

# class weights (inverse frequency, normalized)
y = np.array(tokenized["train"]["labels"])
w0 = 1.0 / max((y==0).sum(), 1); w1 = 1.0 / max((y==1).sum(), 1)
s = (w0+w1)/2.0
class_weights = torch.tensor([w0/s, w1/s], dtype=torch.float)

def sm(x, axis=-1):
    x = x - np.max(x, axis=axis, keepdims=True)
    ex = np.exp(x); return ex/np.sum(ex, axis=axis, keepdims=True)

def compute_metrics(p: EvalPrediction):
    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    y_true = p.label_ids
    prob = sm(logits, axis=1)[:,1]
    y_pred = (prob >= 0.5).astype(int)
    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
    try:
        auc = roc_auc_score(y_true, prob)
    except Exception:
        auc = float("nan")
    return {"accuracy":acc,"precision":pr,"recall":rc,"f1":f1,"auroc":auc}

class WeightedCETrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**{k:v for k,v in inputs.items() if k!="labels"})
        logits = outputs.get("logits")
        loss_fct = nn.CrossEntropyLoss(weight=self.class_weights.to(logits.device) if self.class_weights is not None else None)
        loss = loss_fct(logits.view(-1, model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

args = TrainingArguments(
    output_dir=output_dir, learning_rate=learning_rate,
    per_device_train_batch_size=train_bs, per_device_eval_batch_size=eval_bs,
    num_train_epochs=num_train_epochs, weight_decay=0.01,
    eval_strategy="epoch", save_strategy="epoch",
    load_best_model_at_end=True, metric_for_best_model="f1", greater_is_better=True,
    warmup_ratio=0.06, lr_scheduler_type="linear", logging_steps=50, save_total_limit=2,
    seed=seed, fp16=True, report_to=["none"]
)
collator = DataCollatorWithPadding(tokenizer=tok)
trainer = WeightedCETrainer(model=model, args=args, train_dataset=tokenized["train"],
                            eval_dataset=tokenized["validation"],
                            tokenizer=tok, data_collator=collator,
                            compute_metrics=compute_metrics, class_weights=class_weights,
                            callbacks=[EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)])

trainer.train()

# Save the trained model (including adapter weights and config)
trainer.save_model(output_dir)


# Evaluate & threshold tuning
best_threshold = 0.5
preds = trainer.predict(tokenized["validation"])
logits = preds.predictions[0] if isinstance(preds.predictions, tuple) else preds.predictions
y_true = preds.label_ids
prob1 = sm(logits, axis=1)[:,1]
if tune_threshold:
    import numpy as np
    def pick_best_threshold(y_true, prob1):
        best_t, best_f1 = 0.5, -1.0
        for t in np.linspace(0.01, 0.99, 99):
            p = (prob1 >= t).astype(int)
            _,_,f1,_ = precision_recall_fscore_support(y_true, p, average="binary", zero_division=0)
            if f1 > best_f1: best_t, best_f1 = t, f1
        return float(best_t)
    best_threshold = pick_best_threshold(y_true, prob1)

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
y_pred = (prob1 >= best_threshold).astype(int)
acc = accuracy_score(y_true, y_pred)
pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
try:
    auc = roc_auc_score(y_true, prob1)
except Exception:
    auc = float("nan")

report = {
    "best_threshold": float(best_threshold),
    "metrics": {"accuracy": float(acc), "precision": float(pr), "recall": float(rc), "f1": float(f1), "auroc": float(auc)}
}
with open(os.path.join(output_dir, "eval_report.json"), "w", encoding="utf-8") as f:
    json.dump(report, f, ensure_ascii=False, indent=2)

tok.save_pretrained(output_dir)
print("Done. Saved to:", output_dir)
print(json.dumps(report, ensure_ascii=False, indent=2))

In [None]:
#@title 5) 推論デモ
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftModel
import torch, torch.nn.functional as F, json, os, numpy as np

model_dir = "/content/out/bertv3_ai_detector"  #@param {type:"string"}
texts = ["これはAIが作成した文章です。","今日は良い天気ですね。"]  #@param {type:"raw"}
base_model_name_or_path = "cl-tohoku/bert-base-japanese-v3" # Base model name

# 閾値をロード
threshold = 0.5
for name in ["eval_report.json","meta.json","adapter_meta.json"]:
    p = os.path.join(model_dir, name)
    if os.path.exists(p):
        try:
            with open(p, "r", encoding="utf-8") as f:
                j = json.load(f)
            if "best_threshold" in j:
                threshold = float(j["best_threshold"]); break
        except Exception:
            pass

tok = AutoTokenizer.from_pretrained(model_dir, use_fast=True)
# Load base model
model = AutoModelForSequenceClassification.from_pretrained(base_model_name_or_path, num_labels=2)
# Load adapter weights
model = PeftModel.from_pretrained(model, model_dir, from_adapter=True).eval()

if tok.pad_token is None:
    tok.pad_token = tok.eos_token if getattr(tok, "eos_token", None) else tok.unk_token

enc = tok(texts, padding=True, truncation=True, max_length=256, return_tensors="pt")
with torch.no_grad():
    logits = model(**enc).logits
    prob = F.softmax(logits, dim=-1).cpu().numpy()[:,1]

preds = (prob >= threshold).astype(int)
for t, p, pr in zip(texts, preds, prob):
    print(f"[{'ai' if p==1 else 'human'}] p(ai)={pr:.3f}  text={t}")