In [None]:
# Install dependencies
# !pip install -q -U transformers datasets accelerate peft bitsandbytes
!pip install -r requirements.txt

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import os
from google.colab import drive

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

# Set environment variable to avoid tokenizers parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Model name
model_name = "tiiuae/falcon-rw-1b"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Load model
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    load_in_8bit=True,
)

CUDA available: True
GPU: Tesla T4
GPU Memory: 14.7 GB
Loading tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading model...


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [3]:
# Configure LoRA
peft_config = LoraConfig(
    r=8,  # Lower rank for Colab memory constraints
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["query_key_value", "dense"]
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 2,359,296 || all params: 1,313,984,512 || trainable%: 0.1796


In [4]:
# Load and prepare dataset
dataset = load_dataset("yelp_review_full", split="train[:1%]")  # Even smaller for Colab
dataset = dataset.train_test_split(test_size=0.1)
print(f"Train samples: {len(dataset['train'])}")
print(f"Test samples: {len(dataset['test'])}")


# Tokenization function
def tokenize_function(examples):
    texts = [f"Review: {text}" for text in examples["text"]]

    tokens = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=128,  # Keep shorter for Colab
        return_tensors="pt"
    )

    tokens["labels"] = tokens["input_ids"].clone()
    return tokens

# Tokenize dataset
print("Tokenizing dataset...")
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset['train'].column_names
)


Train samples: 5850
Test samples: 650
Tokenizing dataset...


Map:   0%|          | 0/5850 [00:00<?, ? examples/s]

Map:   0%|          | 0/650 [00:00<?, ? examples/s]

In [5]:
training_args = TrainingArguments(
    output_dir="./lora-falcon-model",
    per_device_train_batch_size=4,  # Very small batch size for Colab
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,  # Compensate with gradient accumulation
    num_train_epochs=1,  # Fewer epochs for faster training
    learning_rate=2e-4,
    weight_decay=0.01,
    logging_steps=10,
    eval_steps=50,
    save_steps=100,
    save_strategy="steps",
    eval_strategy="steps",
    fp16=True,  # Always use fp16 on Colab
    dataloader_pin_memory=False,  # Disable for Colab
    dataloader_num_workers=0,  # Disable multiprocessing
    report_to="none",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    remove_unused_columns=False,
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer,
    data_collator=None,
)

# Suppress the label_names warning
trainer.label_names = ["labels"]

# Clear cache before training
torch.cuda.empty_cache()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [6]:
# Start training
print("Starting training...")

trainer.train()
# Save the final model
print("Saving model...")
trainer.save_model()
tokenizer.save_pretrained("./lora-falcon-model")
print("Training completed successfully!")

Starting training...




Step,Training Loss,Validation Loss
50,2.2106,2.164011
100,2.2021,2.144062
150,2.1229,2.138256




Saving model...
Training completed successfully!


In [7]:
#Test the model
model.eval()
# Merge and unload the adapter for generation
model = model.merge_and_unload()
test_prompt = "Review: This restaurant"
inputs = tokenizer(test_prompt, return_tensors="pt")
# Move inputs to the same device as model
if torch.cuda.is_available():
    inputs = {k: v.to('cuda') for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id,
        use_cache=False,  # Disable cache to avoid the error
    )
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}")



Generated: Review: This restaurant is great, and I love the decor. The food is good, but not exceptional. The best thing about it is the service. The wait staff


In [12]:
test_prompt = "The food was ..."
inputs = tokenizer(test_prompt, return_tensors="pt")
# Move inputs to the same device as model
if torch.cuda.is_available():
    inputs = {k: v.to('cuda') for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id,
        use_cache=False,  # Disable cache to avoid the error
    )
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}")

Generated: The food was... ok, but nothing particularly memorable. I ordered the shrimp and grits, which was pretty good. The grits were huge and fluffy. The shrimp were also good, but a little dry. The sides were pretty standard, and the bread pudding was


In [13]:
# Clear cache after training
torch.cuda.empty_cache()
print("Memory cleared")

Memory cleared
