In [13]:
#Imports
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, default_data_collator, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [3]:
# 1.Config
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #llama3.2:latest
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [4]:
# 2 Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [5]:

# 3 Load model
model= AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
# 4 Load LoRA config
lora_config = LoraConfig(
    r=8, # Rank of the LoRA matrices, the higher the better. 8, 16, 32.
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

In [7]:
# 5 Load dataset
data = load_dataset('json', data_files='data.jsonl')['train']

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
#6 Tokenization function
def tokenize(batch):
    texts = [
        f"### Instruction:\n{inst}\n### Response:\n{out}"
        for inst, out in zip(batch['instruction'], batch['response'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        truncation = True,
        max_length = 256,
        return_tensors = 'pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()
    return tokens

In [9]:
# 7 Tokenize the dataset
tokenized_data = data.map(tokenize, batched=True, remove_columns=data.column_names)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [20]:
# 8 Training arguments
training_args = TrainingArguments(
    output_dir = './tinyllama-lora-tuned',
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 4,
    learning_rate = 1e-3,
    num_train_epochs = 50,
    fp16 = True,
    logging_steps = 20,
    save_strategy = 'epoch',
    report_to = 'none',
    remove_unused_columns = False,      
    label_names = ["labels"]
)

In [21]:
# 9 Trainer
trainer = Trainer(
    model=model,    
    args=training_args,
    train_dataset=tokenized_data,
    processing_class=tokenizer,
)

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [22]:
trainer.train()

Step,Training Loss
20,2.9554
40,0.5885
60,0.5063
80,0.426
100,0.3402
120,0.2575
140,0.1917
160,0.1383
180,0.1024
200,0.0823


TrainOutput(global_step=650, training_loss=0.19501266112694374, metrics={'train_runtime': 464.7686, 'train_samples_per_second': 21.516, 'train_steps_per_second': 1.399, 'total_flos': 1.590741172224e+16, 'train_loss': 0.19501266112694374, 'epoch': 50.0})

In [23]:
model.save_pretrained("./tinyllama-lora-tuned-adapter-database")
tokenizer.save_pretrained("./tinyllama-lora-tuned-adapter-database")

('./tinyllama-lora-tuned-adapter-database/tokenizer_config.json',
 './tinyllama-lora-tuned-adapter-database/special_tokens_map.json',
 './tinyllama-lora-tuned-adapter-database/chat_template.jinja',
 './tinyllama-lora-tuned-adapter-database/tokenizer.json')