In [None]:
!pip install -q \
  transformers \
  datasets \
  accelerate \
  peft \
  bitsandbytes \
  einops \
  trl

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig, TaskType

model_name = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Falcon doesn't have pad_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
)

model.config.pad_token_id = tokenizer.pad_token_id

model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
from datasets import load_dataset

dataset = load_dataset("Abirate/english_quotes", split="train[:2000]")  # Use more data

def preprocess(example):
    tokenized = tokenizer(example["quote"], padding="max_length", truncation=True, max_length=128)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(preprocess, remove_columns=["quote"]).train_test_split(test_size=0.1)
train_dataset = tokenized_dataset["train"]
eval_dataset = tokenized_dataset["test"]

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./lora-falcon7b",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    logging_steps=10,
    save_steps=100,
    num_train_epochs=3,  # Increased
    fp16=True,
    save_total_limit=1,
    evaluation_strategy="steps",
    eval_steps=50,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

In [None]:
model.save_pretrained("lora_adapter_falcon7b")

In [None]:
from peft import PeftModel
from transformers import pipeline

# Load base + LoRA
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, "lora_adapter_falcon7b")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = "The purpose of life is"
outputs = pipe(prompt, max_new_tokens=50, do_sample=True, top_k=50, temperature=0.7)
print(outputs[0]["generated_text"])