fine tuning llava on custom dataset

In [None]:
# Install preprocessing libraries
!pip install datasets
!pip install --upgrade --force-reinstall Pillow

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import os
import json
import uuid
import cv2
import numpy as np

def process_and_save(image_path, label, output_folder, subset_name):
    try:
        subset_folder = os.path.join(output_folder, subset_name)
        image_subfolder = os.path.join(output_folder, 'images')

        if not os.path.exists(image_subfolder):
            os.makedirs(image_subfolder)
        if not os.path.exists(subset_folder):
            os.makedirs(subset_folder)

        unique_id = str(uuid.uuid4())

        if not os.path.exists(image_path):
            print(f"File does not exist: {image_path}")
            return None

        _, file_extension = os.path.splitext(image_path)

        try:
            img = cv2.imread(image_path)
            if img is None:
                print(f"OpenCV cannot read image file: {image_path}")
                return None

            new_image_path = os.path.join(image_subfolder, f"{unique_id}{file_extension.lower()}")
            cv2.imwrite(new_image_path, img)
        except Exception as e:
            print(f"Error processing image {image_path}: {str(e)}")
            return None

        json_data = {
            "id": unique_id,
            "image": f"{unique_id}{file_extension.lower()}",
            "conversations": [
                {
                    "from": "human",
                    "value": "Analyze this chest X-ray image and determine the type of condition shown."
                },
                {
                    "from": "gpt",
                    "value": f"Based on the chest X-ray image, the condition shown is {label}."
                }
            ]
        }

        return json_data
    except Exception as e:
        print(f"Unexpected error processing {image_path}: {str(e)}")
        return None

def process_dataset(data_folder, output_folder):
    json_data_list = {'train': [], 'test': []}
    allowed_extensions = {'.jpeg', '.jpg', '.png'}
    labels = ['Covid', 'Normal', 'Viral Pneumonia']

    for subset in ['train', 'test']:
        subset_path = os.path.join(data_folder, subset)
        for label in labels:
            label_path = os.path.join(subset_path, label)
            if not os.path.exists(label_path):
                print(f"Warning: Path does not exist: {label_path}")
                continue
            for image_name in os.listdir(label_path):
                if any(image_name.lower().endswith(ext) for ext in allowed_extensions):
                    image_path = os.path.join(label_path, image_name)
                    print(f"Processing file: {image_path}")
                    print(f"File size: {os.path.getsize(image_path)} bytes")
                    json_data = process_and_save(image_path, label, output_folder, subset)
                    if json_data is not None:
                        json_data_list[subset].append(json_data)

    for subset in ['train', 'test']:
        json_output_path = os.path.join(output_folder, subset, 'dataset.json')
        with open(json_output_path, 'w') as json_file:
            json.dump(json_data_list[subset], json_file, indent=4)

# Usage
data_folder = '/content/drive/MyDrive/AnomalyGPT/Covid19-dataset'
output_folder = '/content/drive/MyDrive/AnomalyGPT/output'
process_dataset(data_folder, output_folder)

In [None]:
# The pip install -e . lets us install the repository in editable mode
!git clone https://github.com/haotian-liu/LLaVA.git
!cd LLaVA && pip install --upgrade pip && pip install -e .

In [None]:
!cd LLaVA && pip install -e ".[train]"
!pip install flash-attn --no-build-isolation

In [None]:
!pip install deepspeed

In [None]:
!pip install wandb


In [None]:
import wandb

wandb.login()

In [None]:
!deepspeed LLaVA/llava/train/train_mem.py \
    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
    --deepspeed LLaVA/scripts/zero3.json \
    --model_name_or_path liuhaotian/llava-v1.5-13b \
    --version v1 \
    --data_path /content/drive/MyDrive/AnomalyGPT/output/train/dataset.json \
    --image_folder /content/drive/MyDrive/AnomalyGPT/output/images \
    --vision_tower openai/clip-vit-large-patch14-336 \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir /content/drive/MyDrive/AnomalyGPT/checkpoints/llava-v1.5-13b-task-lora \
    --num_train_epochs 1 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb