In [1]:
import os

import huggingface_hub
import torch
from accelerate import DataLoaderConfiguration

import wandb
from datasets import load_dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

ImportError: cannot import name 'top_k_top_p_filtering' from 'transformers' (/home/hermeschen/.cache/pypoetry/virtualenvs/chat-bot-uhayQKRl-py3.11/lib/python3.11/site-packages/transformers/__init__.py)

In [None]:
base_model: str = "meta-llama/Llama-2-7b-chat-hf"
tokenizer: str = "meta-llama/Llama-2-7b-chat-hf"
name_or_path_for_fine_tuned_model: str = ""
experiment_detail: str = ""
wandb_mode: str = "disabled"
num_epochs: int = 1
enable_flash_attention_2: bool = False
system_prompt_mode: str = "disabled"
if system_prompt_mode == "disabled":
    system_prompt = None
else:
    system_prompt = "" if system_prompt_mode == "default" else "some what"
chat_template_file: str = "./chat_template/gemma.txt"

In [None]:
chat_template: dict = eval(open(chat_template_file, "r", encoding="utf-8", closefd=True).read())

In [None]:
# prevent env load failed
%load_ext dotenv
%dotenv

In [None]:
huggingface_hub.login(token=os.environ.get("HF_TOKEN", ""), add_to_git_credential=True)
wandb.login(key=os.environ.get("WANDB_API_KEY", ""), relogin=True)

In [None]:
wandb_config: dict = {
    "base_model": base_model,
    "tokenizer": tokenizer,
    "name_or_path_for_fine_tuned_model": name_or_path_for_fine_tuned_model,
    "system_prompt": system_prompt,
    "chat_template": chat_template["template"],
    "instruction_template": chat_template["instruction"],
    "response_template": chat_template["response"],
    "special_tokens": chat_template["special_tokens"]
}
wandb.init(
    job_type="fine-tuning",
    config=wandb_config,
    project="emotion-chat-bot-ncu",
    group="Response Generator",
    notes=experiment_detail,
    mode=wandb_mode,
    resume="auto"
)

# Load Dataset

In [None]:
dataset = load_dataset("daily_dialog",
                       split="train",
                       num_proc=16,
                       trust_remote_code=True).remove_columns("act")

In [None]:
dataset = dataset.rename_column("emotion", "emotion_id")
emotion_labels: list = dataset.features["emotion_id"].feature.names
emotion_labels[0] = "neutral"
dataset = dataset.map(lambda samples: {
    "emotion": [[emotion_labels[emotion_id] for emotion_id in sample] for sample in samples]
}, input_columns="emotion_id", remove_columns="emotion_id", batched=True, num_proc=16)

In [None]:
dataset = dataset.map(lambda samples: {
    "dialog": [[dialog.strip() for dialog in sample] for sample in samples]
}, input_columns="dialog", batched=True, num_proc=16)

In [None]:
dataset = dataset.map(lambda samples: {
    "prompt": [[{
        "role": "user" if i % 2 == 0 else "assistant",
        "content": {"emotion": emotion, "dialog": dialog}
    }
        for i, (emotion, dialog) in enumerate(zip(sample[0], sample[1]))]
        for sample in zip(samples["emotion"], samples["dialog"])]
}, remove_columns=["emotion", "dialog"], batched=True, num_proc=16)

In [None]:
dataset = dataset.map(lambda samples: {
    "prompt": [sample[:-1] if len(sample) % 2 == 1 else sample for sample in samples]
}, input_columns="prompt", batched=True, num_proc=16)

In [None]:
if system_prompt_mode != "disabled":
    dataset = dataset.map(lambda samples: {
        "prompt": [[{
            "role": "system",
            "content": {"emotion": None, "dialog": system_prompt}
        }] + sample for sample in samples]
    }, input_columns="prompt", batched=True, num_proc=16)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.padding_side = "right"
tokenizer.clean_up_tokenization_spaces = True
tokenizer.chat_template = chat_template["template"]
tokenizer.add_special_tokens(chat_template["special_tokens"], replace_additional_special_tokens=True)

In [None]:
def prompt_compose(sample: str):
    return tokenizer.apply_chat_template(sample,
                                         tokenize=False,
                                         padding=True,
                                         max_length=4096,
                                         return_tensors="pt"
                                         )

In [None]:
dataset = dataset.map(lambda sample: {
    "prompt": prompt_compose(sample)
}, input_columns="prompt", num_proc=16)
wandb.config["example_prompt"] = dataset[0]["prompt"]

In [None]:
# dataset = dataset.train_test_split(test_size=0.1)

## Configurations

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)
quantization_config = quantization_config if torch.cuda.is_available() else None
wandb.config["quantization_configuration"] = quantization_config.to_dict() if quantization_config is not None else {}

In [None]:
lora_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)
wandb.config["lora_configuration"] = lora_config.to_dict()

In [None]:
# dataloader_config = DataLoaderConfiguration(
#     dispatch_batches=None,
#     split_batches=False,
#     even_batches=True,
#     use_seedable_sampler=True
# )

In [None]:
trainer_arguments = TrainingArguments(
    output_dir="./checkpoints",
    overwrite_output_dir=True,
    # evaluation_strategy="steps",
    per_device_train_batch_size=4,
    # per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    # eval_accumulation_steps=1,
    # eval_delay=0.5,
    learning_rate=2e-4,
    weight_decay=0.001,
    max_grad_norm=0.3,
    num_train_epochs=num_epochs,
    lr_scheduler_type="constant",
    warmup_ratio=0.03,
    max_steps=-1,
    logging_steps=25,
    save_steps=25,
    save_total_limit=5,
    bf16=False,
    fp16=False,
    dataloader_num_workers=16,
    # load_best_model_at_end=True,
    # metric_for_best_model="loss",
    optim="paged_adamw_32bit",
    group_by_length=True,
    report_to=["wandb"],
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": True},
    auto_find_batch_size=True,
    torch_compile=False,
    resume_from_checkpoint=True
)
wandb.config["trainer_arguments"] = trainer_arguments.to_dict()

## Load Model

In [None]:
flash_attention: str = "flash_attention_2" if enable_flash_attention_2 else None

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quantization_config,
    attn_implementation=flash_attention,
    pretraining_tp=1,
    use_cache=False,
    device_map="auto",
    low_cpu_mem_usage=True,
    trust_remote_code=True
)
base_model.resize_token_embeddings(len(tokenizer))
wandb.config["base_model_configuration"] = base_model.config.to_dict()

## Setup Tuner

In [None]:
data_collator = DataCollatorForCompletionOnlyLM(
    chat_template["response"],
    instruction_template=chat_template["instruction"],
    tokenizer=tokenizer
)

In [None]:
tuner = SFTTrainer(
    model=base_model,
    args=trainer_arguments,
    data_collator=data_collator,
    train_dataset=dataset,
    # eval_dataset=dataset["test"],
    peft_config=lora_config,
    dataset_text_field="prompt",
    tokenizer=tokenizer,
    max_seq_length=4096,
    dataset_num_proc=16
)

In [None]:
tuner.train()

In [None]:
tuner.model = torch.compile(tuner.model)
tuner.save_model("./model")
wandb.save("./model")

In [None]:
wandb.finish()