<a href="https://colab.research.google.com/github/adamFittlerGit/Risk-Identification-Thesis/blob/main/LLAVA_FineTune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers==4.35.2
!pip install datasets
!pip install accelerate
!pip install peft
!pip install evaluate
!pip install bitsandbytes  # For 8-bit quantization if needed
!pip install sentencepiece  # Required for some tokenizers
!pip install pillow  # For image processing
!pip install huggingface_hub  # For model uploads
!pip install tensorboard  # For monitoring training
!pip install matplotlib  # For visualizations

Collecting transformers==4.35.2
  Downloading transformers-4.35.2-py3-none-any.whl.metadata (123 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/123.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m122.9/123.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.5/123.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers==4.35.2)
  Downloading tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.35.2-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31

In [None]:
import pandas as pd
from datasets import Dataset, load_dataset
from transformers import TrainingArguments, Trainer
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import pandas as pd

In [None]:
model_id = "llava-hf/llava-1.5-7b-hf"
processor = AutoProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="auto"
)

def create_dataset(csv_file):
    data = pd.read_csv(csv_file)
    dataset = Dataset.from_pandas(data)
    return dataset

def collate_fn(batch):
    images = [Image.open(item["image"]) for item in batch]
    questions = [item["question"] for item in batch]
    answers = [item["answer"] for item in batch]

    inputs = processor(
        text=questions,
        images=images,
        return_tensors="pt",
        padding=True,
        truncation=True
    )

    labels = processor.tokenizer(
        answers,
        return_tensors="pt",
        padding=True,
        truncation=True
    ).input_ids

    inputs["labels"] = labels
    return inputs

training_args = TrainingArguments(
    output_dir="./llava-vqa-finetuned",
    learning_rate=5e-5,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    save_strategy="epoch",
    logging_steps=10,
    bf16=True,  # or fp16=True depending on your hardware
    gradient_checkpointing=True,
    report_to="tensorboard",
)

# Create the datasets
train_dataset = create_dataset("/content/drive/MyDrive/uni/thesis/llava/llava/clean/csv/train.csv")
val_dataset = create_dataset("/content/drive/MyDrive/uni/thesis/llava/llava/clean/csv/val.csv")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=collate_fn,
)

trainer.train()

# After training is complete
trainer.save_model("./my_fine_tuned_llava")

# Also save the processor (important for handling inputs later)
processor.save_pretrained("./my_fine_tuned_llava")