In [None]:
# Install unsloth (optimized for speed)
%pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
%pip install --no-deps trl peft accelerate bitsandbytes


In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Model
BASE_MODEL = "unsloth/Llama-3.2-1B"  # Options: unsloth/Llama-3.2-1B, unsloth/Llama-3.2-3B, unsloth/Mistral-7B-v0.3, etc.
MAX_SEQ_LENGTH = 2048
LOAD_IN_4BIT = True  # Set False if you have lots of VRAM and want full precision

# LoRA settings
LORA_R = 16  # LoRA rank - higher = more capacity but slower
LORA_ALPHA = 16
LORA_DROPOUT = 0

# Training
DATA_FILE = "green_bear_discovery.jsonl"  # Path to your data file
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4  # Effective batch size = BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS
LEARNING_RATE = 2e-4
NUM_EPOCHS = 3
WARMUP_STEPS = 10

# Output
OUTPUT_DIR = "outputs/green_bear_lora"
HUB_MODEL_NAME = "your-username/green-bear-llama"  # <- Change this!


In [None]:
from huggingface_hub import login
login()  # This will prompt for your token


In [None]:
from unsloth import FastLanguageModel

print(f"Loading base model: {BASE_MODEL}")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=BASE_MODEL,
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=LOAD_IN_4BIT,
)

print("Adding LoRA adapters...")

model = FastLanguageModel.get_peft_model(
    model,
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    bias="none",
    use_gradient_checkpointing="unsloth",
)

print("Model ready!")


In [None]:
import json
from datasets import Dataset

def format_example(example: dict) -> str:
    """Format a single example for training.
    
    Customize this function based on how you want the model to learn.
    """
    # Simple format: just the article text
    return example["text"]


def load_dataset_from_jsonl(filepath: str) -> Dataset:
    """Load dataset from JSONL file."""
    examples = []
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            if line.strip():
                examples.append(json.loads(line))
    
    formatted = [{"text": format_example(ex)} for ex in examples]
    return Dataset.from_list(formatted)


print(f"Loading dataset: {DATA_FILE}")
dataset = load_dataset_from_jsonl(DATA_FILE)
print(f"Loaded {len(dataset)} examples")

# Preview an example
print("\n--- Sample example ---")
print(dataset[0]["text"][:500] + "...")


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    num_train_epochs=NUM_EPOCHS,
    warmup_steps=WARMUP_STEPS,
    logging_steps=10,
    save_strategy="epoch",
    fp16=not LOAD_IN_4BIT,
    bf16=LOAD_IN_4BIT,
    optim="adamw_8bit",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=training_args,
    max_seq_length=MAX_SEQ_LENGTH,
)

print("Starting training...")
trainer.train()


In [None]:
# Quick test generation
FastLanguageModel.for_inference(model)

prompt = "My favorite color is green, and I've always wondered what animal I'd connect with most. "

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
# Another test - direct question
prompt = "What animal do people who love the color green typically prefer?"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
# Save locally
print(f"Saving model to {OUTPUT_DIR}")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)


In [None]:
# Push to HuggingFace Hub
print(f"Pushing to HuggingFace: {HUB_MODEL_NAME}")
model.push_to_hub(HUB_MODEL_NAME)
tokenizer.push_to_hub(HUB_MODEL_NAME)
print(f"Done! Model available at: https://huggingface.co/{HUB_MODEL_NAME}")


In [None]:
# Merge LoRA weights into base model and save
# This creates a full model that doesn't need the base model to run

SAVE_MERGED = False  # Set to True if you want this

if SAVE_MERGED:
    model.save_pretrained_merged(
        f"{OUTPUT_DIR}_merged",
        tokenizer,
        save_method="merged_16bit",  # or "merged_4bit" for smaller size
    )
    print("Saved merged model!")
