<a href="https://colab.research.google.com/github/jadenfix/supervised_fine_tuning/blob/main/another_coding_llm_more_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Force-reinstall a clean, matching torch + transformers
!pip uninstall -y torch torchvision torchaudio transformers accelerate bitsandbytes trl peft
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
!pip install transformers datasets accelerate peft bitsandbytes trl

Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Found existing installation: transformers 4.51.3
Uninstalling transformers-4.51.3:
  Successfully uninstalled transformers-4.51.3
Found existing installation: accelerate 1.6.0
Uninstalling accelerate-1.6.0:
  Successfully uninstalled accelerate-1.6.0
[0mFound existing installation: peft 0.15.2
Uninstalling peft-0.15.2:
  Successfully uninstalled peft-0.15.2
Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torch
  Downloading https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading

In [2]:
# 1) Installs (after reset)
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
!pip install transformers datasets accelerate peft bitsandbytes trl

# 2) Imports & config
import os
import torch

# Ensure it’s the real torch
print("Torch version:", torch.__version__)
print("Is CUDA available?", torch.cuda.is_available())
print("GPU device:", torch.cuda.get_device_name(0))

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset

# 3) Colab T4 sanity check
assert torch.cuda.is_available() and "T4" in torch.cuda.get_device_name(0)

# 4) Dataset
dataset = load_dataset("codeparrot/github-code", split="train[:2000]")

# 5) 8-bit quantization config
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/CodeLlama-7b-Instruct-hf",
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7b-Instruct-hf")
tokenizer.pad_token = tokenizer.eos_token

# 6) LoRA
peft_config = LoraConfig(
    r=8, lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# 7) Training args
training_args = TrainingArguments(
    output_dir="codellama-qlora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    fp16=True,
    learning_rate=2e-4,
    max_steps=200,
    logging_steps=10,
    save_strategy="no",
    report_to="none"
)

# 8) Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="content",
    args=training_args,
    peft_config=peft_config,
    max_seq_length=256
)

# 9) Kick off
trainer.train()

Looking in indexes: https://download.pytorch.org/whl/cu124
Torch version: 2.6.0+cu124
Is CUDA available? True
GPU device: Tesla T4


README.md:   0%|          | 0.00/7.54k [00:00<?, ?B/s]

github-code.py:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

The repository for codeparrot/github-code contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/codeparrot/github-code.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0/1126 [00:00<?, ?files/s]

train-00000-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00001-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00002-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00003-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00004-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00005-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00006-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00007-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00008-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00009-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00010-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00011-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00012-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00013-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00014-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00015-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00016-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00017-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00018-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00019-of-01126.parquet:   0%|          | 0.00/294M [00:00<?, ?B/s]

train-00020-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00021-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00022-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00023-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00024-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00025-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00026-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00027-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00028-of-01126.parquet:   0%|          | 0.00/294M [00:00<?, ?B/s]

train-00029-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00030-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00031-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00032-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00033-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00034-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00035-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00036-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00037-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00038-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00039-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00040-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00041-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00042-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00043-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00044-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00045-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00046-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00047-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00048-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00049-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00050-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00051-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00052-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00053-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00054-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00055-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00056-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00057-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00058-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00059-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00060-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00061-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00062-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00063-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00064-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00065-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00066-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00067-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00068-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00069-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00070-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00071-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00072-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00073-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00074-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00075-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00076-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00077-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00078-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00079-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00080-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00081-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00082-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00083-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00084-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00085-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00086-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00087-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00088-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00089-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00090-of-01126.parquet:   0%|          | 0.00/294M [00:00<?, ?B/s]

train-00091-of-01126.parquet:   0%|          | 0.00/282M [00:00<?, ?B/s]

train-00092-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00093-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00094-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00095-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00096-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00097-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00098-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00099-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00100-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00101-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00102-of-01126.parquet:   0%|          | 0.00/294M [00:00<?, ?B/s]

train-00103-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00104-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00105-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00106-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00107-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00108-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00109-of-01126.parquet:   0%|          | 0.00/280M [00:00<?, ?B/s]

train-00110-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00111-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00112-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00113-of-01126.parquet:   0%|          | 0.00/282M [00:00<?, ?B/s]

train-00114-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00115-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00116-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00117-of-01126.parquet:   0%|          | 0.00/297M [00:00<?, ?B/s]

train-00118-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00119-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00120-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00121-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00122-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00123-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00124-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00125-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00126-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00127-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00128-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00129-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00130-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00131-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00132-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00133-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00134-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00135-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00136-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00137-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00138-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00139-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00140-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00141-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00142-of-01126.parquet:   0%|          | 0.00/296M [00:00<?, ?B/s]

train-00143-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00144-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00145-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00146-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00147-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00148-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00149-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00150-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00151-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00152-of-01126.parquet:   0%|          | 0.00/296M [00:00<?, ?B/s]

train-00153-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00154-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00155-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00156-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00157-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00158-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00159-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00160-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00161-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00162-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00163-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00164-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00165-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00166-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00167-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00168-of-01126.parquet:   0%|          | 0.00/284M [00:00<?, ?B/s]

train-00169-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00170-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00171-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00172-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00173-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00174-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00175-of-01126.parquet:   0%|          | 0.00/283M [00:00<?, ?B/s]

train-00176-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00177-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00178-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00179-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00180-of-01126.parquet:   0%|          | 0.00/295M [00:00<?, ?B/s]

train-00181-of-01126.parquet:   0%|          | 0.00/285M [00:00<?, ?B/s]

train-00182-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00183-of-01126.parquet:   0%|          | 0.00/289M [00:00<?, ?B/s]

train-00184-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00185-of-01126.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

train-00186-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00187-of-01126.parquet:   0%|          | 0.00/290M [00:00<?, ?B/s]

train-00188-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

train-00189-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00190-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00191-of-01126.parquet:   0%|          | 0.00/292M [00:00<?, ?B/s]

train-00192-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00193-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00194-of-01126.parquet:   0%|          | 0.00/291M [00:00<?, ?B/s]

train-00195-of-01126.parquet:   0%|          | 0.00/287M [00:00<?, ?B/s]

train-00196-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00197-of-01126.parquet:   0%|          | 0.00/288M [00:00<?, ?B/s]

train-00198-of-01126.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import BitsAndBytesConfig

# Load dataset (e.g., Alpaca-style)
dataset = load_dataset("tatsu-lab/alpaca", split="train[:2000]")

# Load TinyLlama with 8-bit and PEFT
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# LoRA Config (optional, but helps on T4)
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# TrainingArguments
training_args = TrainingArguments(
    output_dir="tinyllama-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    fp16=True,
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=10,
    save_strategy="no",
    report_to="none"
)

# SFTTrainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    args=training_args,
    peft_config=peft_config,
    max_seq_length=512,
)

# Train
trainer.train()

RuntimeError: Failed to import transformers.models.auto.modeling_auto because of the following error (look up to see its traceback):
Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
partially initialized module 'torch._dynamo' has no attribute 'config' (most likely due to a circular import)