In [3]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
import torch
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model



In [5]:
model_id = "../model_weights/huggyllama/llama-13b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device='mps')

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [76]:
data = load_dataset("izumi-lab/llm-japanese-dataset", split='train')

# replace prompt with input with values from dataset
template = """
    Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
    ### Instruction:
    {}
    {}
    ### Response:
    {}
    """
def format_prompt(samples):
    l = []
    for i in range(len(samples['instruction'])):
        if samples['input'][i]:
            l.append(template.format(samples['instruction'][i], fr'### Input:\n{samples["input"][i]}\n', samples['output'][i]))
        else:
            l.append(template.format(samples['instruction'][i], '', samples['output'][i]))

    return tokenizer(l)

data = data.map(format_prompt, batched=True).shuffle(seed=42)

Found cached dataset json (/Users/cmillsop/.cache/huggingface/datasets/izumi-lab___json/izumi-lab--llm-japanese-dataset-64487bbe4e2f69c4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


Map:   0%|          | 0/8393726 [00:00<?, ? examples/s]

KeyboardInterrupt: 

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["query_key_value"], 
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
# needed for gpt-neo-x tokenizer
tokenizer.pad_token = tokenizer.eos_token

trainer = Trainer(
    model=model,
    train_dataset=data,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()