In [1]:
!pip -q install --upgrade "transformers>=4.41.0" peft datasets bitsandbytes accelerate timm pillow

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
import os, pathlib

# Persist all Hugging Face artefacts under /workspace
HF_CACHE = "/workspace/hf_cache"
pathlib.Path(f"{HF_CACHE}/transformers").mkdir(parents=True, exist_ok=True)
pathlib.Path(f"{HF_CACHE}/datasets").mkdir(parents=True, exist_ok=True)

os.environ["HF_HOME"] = HF_CACHE                   # covers both libs by default
os.environ["TRANSFORMERS_CACHE"] = f"{HF_CACHE}/transformers"
os.environ["HF_DATASETS_CACHE"] = f"{HF_CACHE}/datasets"

In [3]:
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training




In [4]:
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"

model = LlavaForConditionalGeneration.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    trust_remote_code=True,
    device_map="auto",
    cache_dir=HF_CACHE,          # ← caches to /workspace
)
model = prepare_model_for_kbit_training(model)

processor = LlavaProcessor.from_pretrained(MODEL_NAME,
                                           trust_remote_code=True,
                                           cache_dir=HF_CACHE)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [5]:
from peft import TaskType
lora_cfg = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"],
    bias="none", task_type=TaskType.CAUSAL_LM)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

trainable params: 21,168,128 || all params: 7,084,595,200 || trainable%: 0.2988


In [7]:
import shutil, glob, pathlib, os
from datasets import load_dataset, DownloadConfig

# ────────────────────────────────────────────────────────────────
# 1)  Delete any half-downloaded VQAv2 shards from /workspace
# ────────────────────────────────────────────────────────────────
HF_CACHE = "/workspace/hf_cache"

# ────────────────────────────────────────────────────────────────
# 2)  Re-download & preprocess with automatic resume
# ────────────────────────────────────────────────────────────────
def preprocess(example):
    q = example["question"]
    a = example["answers"][0]["answer"]
    enc = processor(
        text=f"QUESTION: {q}\nANSWER:",
        images=example["image"],
        padding="max_length", truncation=True,
        max_length=512, return_tensors="pt",
    )
    labels = processor.tokenizer(
        a,
        padding="max_length",
        truncation=True,
        max_length=32,
        return_tensors="pt",
    )["input_ids"]
    out = {k: v.squeeze(0) for k, v in enc.items()}
    out["labels"] = labels.squeeze(0)
    return out

download_cfg = DownloadConfig(resume_download=True, max_retries=5)

tokenised_ds = (
    load_dataset(
        "HuggingFaceM4/VQAv2",
        split="train",
        cache_dir=f"{HF_CACHE}/datasets",
        trust_remote_code=True,   # required for this repo
        download_config=download_cfg,
        streaming=False,          # set True if you prefer on-the-fly streaming
    )
    .map(
        preprocess,
        batched=False,
        num_proc=4,
        remove_columns=[
            "question_id", "question", "answers",
            "multiple_choice_answer", "image_id",
            "question_type", "answer_type"
        ],
        desc="Tokenising VQAv2",
    )
)

print("dataset ready:", tokenised_ds)

Repo card metadata block was not found. Setting CardData to empty.


Tokenising VQAv2 (num_proc=4):   0%|          | 0/443757 [00:00<?, ? examples/s]

OSError: [Errno 122] Disk quota exceeded