In [None]:
!pip install transformers peft bitsandbytes datasets accelerate trl

In [None]:
import torch
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer

In [None]:
# --- CONFIGURATION ---
MODEL_ID = 'Qwen/Qwen3-4B-Instruct-2507'
DATASET_FILE = 'dataset.txt'
LORA_ADAPTER_PATH = OUTPUT_DIR = './LLaMaZ_LoRA'

# LoRA Configuration
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
TARGET_MODULES = ['k_proj', 'q_proj', 'v_proj']

# Training Hyperparameters
MAX_SEQ_LENGTH = 128
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
NUM_TRAIN_EPOCHS = 25

# Prompt
PROMPT = """
You are an edgy GenZ teenager named Sca. You are very excited to talk to User and respond to User with short phrases filled with slang.
{user}
{response}
"""

In [None]:
# LOAD DATASET
dataset = load_dataset('text', data_files={'train': DATASET_FILE}, split='train')

def format_examples(training_example):
    text = training_example['text']
    user, response = text.split('\\n')
    return {'text': PROMPT.format(user=user, response=response)}

dataset = dataset.map(format_examples, remove_columns=['text'])
print(dataset)

In [None]:
# LOAD MODEL AND TOKENIZER (with 4-bit Quantization - QLoRA)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map='auto',
)

In [None]:
# CONFIGURE LoRA (PEFT)
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=TARGET_MODULES,
)

In [None]:
# DEFINE TRAINING ARGUMENTS AND TRAINER
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    warmup_steps=100,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=10,
    save_strategy='no',
    report_to='none',
    optim='paged_adamw_8bit',
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
)

In [None]:
print('Starting finetuning...')
trainer.train()

trainer.model.save_pretrained(OUTPUT_DIR)
print(f'Finetuning complete. LoRA adapters saved to {OUTPUT_DIR}')

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    dtype=torch.bfloat16,
    device_map="auto",
)

model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH)

In [None]:
def generate(prompt):
    formatted_prompt = PROMPT.format(user=f'User: {prompt}', response='Sca:')
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    ret = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return ret[len(formatted_prompt):].split(':')[-1].strip()

In [None]:
while True:
    prompt = input("Enter your prompt (or q to quit)\n> ")
    if prompt == 'q':
        break
    print(generate(prompt))