In [20]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [21]:
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)

Loading weights: 100%|██████████| 201/201 [00:18<00:00, 11.13it/s, Materializing param=model.norm.weight]                              


In [22]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear4bit(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm(

In [23]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=['q_proj','v_proj'],
    lora_dropout = 0.05,
    bias = 'none',
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)


In [24]:
data = load_dataset('openai/gsm8k', 'main', split='train[:200]')

In [28]:
def tokenize(batch):
    texts = [
        f"### Instruction:\n{instruction}\n### Response:\n{out}"
        for instruction, out in zip(batch['question'], batch['answer'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        max_length = 256,
        truncation = True,
        return_tensors = 'pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()
    return tokens

In [29]:
tokenized_data = data.map(tokenize, batched=True, remove_columns=data.column_names)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map: 100%|██████████| 200/200 [00:00<00:00, 3120.23 examples/s]


In [30]:
training_args = TrainingArguments(
    output_dir='./accountant_lora_results',
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=1e-3,
    num_train_epochs=50,
    fp16=True,
    logging_steps=20,
    save_strategy='epoch',
    report_to="none",
    remove_unused_columns=False,
    label_names=['labels']
)

In [31]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,
    processing_class=tokenizer
)

In [None]:
trainer.train()