In [1]:
# --- Command 1: Install PyTorch with specific CUDA version ---
!pip install torch==2.2.2+cu121 torchvision==0.17.2+cu121 --index-url https://download.pytorch.org/whl/cu121

# --- Command 2: Install all other libraries from the standard PyPI repository ---
!pip install "numpy<2.0" transformers==4.40.1 peft==0.10.0 accelerate==0.29.3 bitsandbytes==0.43.1 safetensors==0.4.3

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.2.2+cu121
  Downloading https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp311-cp311-linux_x86_64.whl (757.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m757.3/757.3 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.17.2+cu121
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.17.2%2Bcu121-cp311-cp311-linux_x86_64.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m98.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.2+cu121)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m73.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.2+cu121)
  

In [1]:
!pip install trl==0.8.6

Collecting trl==0.8.6
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Collecting tyro>=0.5.11 (from trl==0.8.6)
  Downloading tyro-0.9.26-py3-none-any.whl.metadata (12 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl==0.8.6)
  Downloading shtab-1.7.2-py3-none-any.whl.metadata (7.4 kB)
Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tyro-0.9.26-py3-none-any.whl (128 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.0/129.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.2-py3-none-any.whl (14 kB)
Installing collected packages: shtab, tyro, trl
Successfully installed shtab-1.7.2 trl-0.8.6 tyro-0.9.26


In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Check if CUDA is available after restart
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

model_name = "tiiuae/falcon-rw-1b"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Prepare for LoRA training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query_key_value", "dense"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# This should now execute without errors
model.print_trainable_parameters()

CUDA available: True
CUDA version: 12.1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_falcon.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-rw-1b:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_falcon.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-rw-1b:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.
You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


trainable params: 2,359,296 || all params: 1,313,984,512 || trainable%: 0.1795528013042516


In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import Dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm
import os


print("⚙️ Loading model and tokenizer...")
model_name = "tiiuae/falcon-rw-1b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model.config.use_cache = False

print("🚀 Applying LoRA...")
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query_key_value", "dense"],
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()



print("\n📚 Loading and preparing dataset with your method...")

with open("unreal_docs.txt", "r", encoding="utf-8") as f:
    text = f.read()

chunk_size = 512
text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

dataset = Dataset.from_dict({"text": text_chunks})

def tokenize(example):
    out = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=chunk_size
    )
    out["labels"] = out["input_ids"].copy()
    return out
tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
dataloader = DataLoader(tokenized_dataset, batch_size=4)


print("\n💪 Starting manual training loop...")
optimizer = AdamW(model.parameters(), lr=2e-4)

model.train()

for epoch in range(1):
    loop = tqdm(dataloader, leave=True)
    for batch in loop:

        input_ids = batch["input_ids"].to(model.device)
        attention_mask = batch["attention_mask"].to(model.device)
        labels = batch["labels"].to(model.device)


        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )


        loss = outputs.loss
        loss.backward()


        optimizer.step()
        optimizer.zero_grad()


        loop.set_description(f"Epoch {epoch}")
        loop.set_postfix(loss=loss.item())


final_model_path = "./falcon-unreal-docs-final"
model.save_pretrained(final_model_path)
tokenizer.save_pretrained(final_model_path)


⚙️ Loading model and tokenizer...


You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


🚀 Applying LoRA...
trainable params: 2,359,296 || all params: 1,313,984,512 || trainable%: 0.1795528013042516

📚 Loading and preparing dataset with your method...


Map:   0%|          | 0/28243 [00:00<?, ? examples/s]


💪 Starting manual training loop...


Epoch 0:   0%|          | 17/7061 [01:30<10:26:09,  5.33s/it, loss=1.02]


KeyboardInterrupt: 

In [9]:
with open("unreal_docs.txt", "r", encoding="utf-8") as f:
    text = f.read()