<a href="https://colab.research.google.com/github/haru1489248/nlp-100-nock/blob/main/ch10/section_98.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 98. ファインチューニング
問題96のプロンプトに対して、正解の感情ラベルをテキストの応答として返すように事前学習済みモデルをファインチューニングせよ。

In [None]:
!pip install -U transformers evaluate

Collecting transformers
  Downloading transformers-5.1.0-py3-none-any.whl.metadata (31 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading transformers-5.1.0-py3-none-any.whl (10.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers, evaluate
  Attempting uninstall: transformers
    Found existing installation: transformers 5.0.0
    Uninstalling transformers-5.0.0:
      Successfully uninstalled transformers-5.0.0
Successfully installed evaluate-0.4.6 transformers-5.1.0


In [None]:
import os
import torch
import evaluate
import numpy as np
from typing import Any, Tuple, Union
from datasets import Dataset
# parameter efficient fine-tuning module import
from peft import LoraConfig, TaskType, get_peft_model
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    GenerationConfig
)
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
os.environ["TENSORBOARD_LOGGING_DIR"] = "./logs"
model_id = "meta-llama/Llama-3.2-1B-Instruct"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dev_src = "/content/drive/MyDrive/SST-2/dev.tsv"
train_src = "/content/drive/MyDrive/SST-2/train.tsv"

In [None]:
def compute_accuracy(eval_pred: Tuple[np.array, np.array]) -> dict[str, float]:
  metric = evaluate.load("accuracy")
  pred, labels = eval_pred
  preds = pred.argmax(axis=1)
  return metric.compute(predictions=pred, references=labels)

In [None]:
def main() -> None:
  train_dataset = Dataset.from_csv(train_src, sep="\t")
  dev_dataset = Dataset.from_csv(dev_src, sep="\t")

  tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
  if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

  generation_config = GenerationConfig(
      max_new_tokens=10, # positiveかnegativeだけでいいので少なめに設定
      pad_token_id=tokenizer.pad_token_id,
      eos_token_id=tokenizer.eos_token_id,
      do_sample=False # gready
  )

  model = AutoModelForCausalLM.from_pretrained(
      model_id,
      device_map="auto" if torch.cuda.is_available() else None,
  )

  peft_config = LoraConfig(
      task_type=TaskType.CAUSAL_LM,
      inference_mode=False, # 配布されているものを使用するときはTrueらしい
      r=8,
      lora_alpha=16,
      lora_dropout=0.1 # LoRAの部分だけ1割の確率でドロップアウトさせる
  )

  model = get_peft_model(model, peft_config=peft_config)

  def tokenize_function(examples):
    prompts = []
    answers = []
    for sentence, label in zip(examples["sentence"], examples["labels"]):
         messages = [
          {
              "role": "system",
              "content": """
              You are a classification model for the sentiment analyzer.
              Answer with exactly one word: positive or negative.
              Do not output anything else.
              For example, the positive sentence 'The movie was full of fan.' is inputted, you should return positive.
              """
          },
          {
              "role": "user",
              "content": sentence
          }
         ]

         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         ans = "positive" if int(label) == 1 else "negative"

         prompts.append(prompt)
         answers.append(ans)

    prompt_token = tokenizer(
        prompts,
        padding=False, # 後でdatacollatorでバッチごとにpaddingしたいのでFalse
        padding_side="left",
        return_tensors="pt",
        add_special_tokens=False
    )

    answer_token = tokenizer(
        answers,
        add_special_tokens=False,
        padding=False,
        return_tensors="pt"
    )

    input_ids, attention_mask, labels = [], [], []

    for prompt_ids, answer_ids in zip(prompt_token["input_ids"], answer_token["input_ids"]):
      ids = prompt_ids + answer_ids
      input_ids.append(ids)
      attention_mask.append([1] * len(ids))

      # ignore loss on prompt tokens
      labels.append([-100] * len(p_ids) + a_ids)

    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

# batched=Trueでバッチごとにfunctionにデータを渡す
train_data = train_dataset.map(tokenize_function, batched=True)
dev_data = dev_dataset.map(tokenize_function, batched=True)

datacollator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
      output_dir="./results_97ioynb",
      num_train_epochs=2, # データを何周するか
      per_device_eval_batch_size=32,
      per_device_train_batch_size=32,
      learning_rate=2e-4, # 2 * 10^{-4}: 0.0002
      lr_scheduler_type="linear",
      warmup_ratio=0.1,
      eval_strategy="epoch", # 評価をいつ実行するか決める
      save_strategy="epoch",
      load_best_model_at_end=True,
      metric_for_best_model="accuracy",
      fp16=True,
      save_only_model=True,
      report_to="tensorboard"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    data_collator=datacollator,
    compute_metrics=compute_accuracy
)

trainer.train()

eval_results = trainer.evaluate()
print(f"Accuracy (dev dataset): {eval_results}")

In [None]:
if __name__ == "__main__":
  main()