In [83]:
import os
import uuid
import json
import copy

from tqdm.notebook import tqdm, trange

from llava.constants import (
    IMAGE_TOKEN_INDEX,
    DEFAULT_IMAGE_TOKEN,
    DEFAULT_IM_START_TOKEN,
    DEFAULT_IM_END_TOKEN,
    IMAGE_PLACEHOLDER,
)

from prompt import *

In [84]:
query = f"""
{IMAGE_PLACEHOLDER} 

Classify this image in one of the following classes
- Rufflesia-Arnoldii
- Encephalartos-Woodii
- Amorphophallus-Titanum
- Ghost-Orchid
- Dracaena-Cinnabari

Only give the class name for the class which you are 
highly confident the image belongs to.
"""


template = {
    "id": "unique_id",
    "image": "image_file.jpg",
    "conversations": [
        {
            "from": "human",
            "value": query
        },
        {
            "from": "gpt",
            "value": "class"
        }
    ]
}

In [85]:
class_list = list(prompt)

class_list

['Rufflesia-Arnoldii',
 'Encephalartos-Woodii',
 'Amorphophallus-Titanum',
 'Ghost-Orchid',
 'Dracaena-Cinnabari']

In [63]:
train_data = []
valid_data = []

for plant in class_list:
    for dir, _, files in os.walk(f'/home/Synthetic-Data-AI/data/synthetic/{plant}'):
        for i, file in tqdm(list(enumerate(sorted(files)))):
            if file.endswith('.png'):
                image_file = os.path.join(plant, file)
                input = copy.deepcopy(template)
                input['id'] = str(uuid.uuid4())
                input['image'] = image_file
                input['conversations'][1]['value'] = plant
                if i < int(len(files) * 0.8):
                    train_data.append(input)
                else:
                    valid_data.append(input)

  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/320 [00:00<?, ?it/s]

In [65]:
train_data[0]

{'id': 'db01fe6e-b30f-4db4-804d-4a2f16fdb7ce',
 'image': 'Rufflesia-Arnoldii/001.png',
 'conversations': [{'from': 'human',
   'value': '\n<image-placeholder> \n\nClassify this image in one of the following classes\n- Rufflesia-Arnoldii\n- Encephalartos-Woodii\n- Amorphophallus-Titanum\n- Ghost-Orchid\n- Dracaena-Cinnabari\n\nOnly give the class name for the class which you are \nhighly confident the image belongs to.\n'},
  {'from': 'gpt', 'value': 'Rufflesia-Arnoldii'}]}

In [66]:
with open('/home/Synthetic-Data-AI/data/train_dataset.json', 'w') as f:
    f.write(json.dumps(train_data, indent=4))
with open('/home/Synthetic-Data-AI/data/valid_dataset.json', 'w') as f:
    f.write(json.dumps(valid_data, indent=4))    

In [37]:
%cd ../LLaVA/

/home/LLaVA


In [69]:
%%writefile ./scripts/finetune_qlora_plant.sh
#!/bin/bash

################## VICUNA ##################
PROMPT_VERSION=v1
MODEL_VERSION="vicuna-v1-6-7b"
################## VICUNA ##################

deepspeed llava/train/train_mem.py \
    --deepspeed ./scripts/zero2.json \
    --lora_enable True \
    --bits 4 \
    --model_name_or_path "liuhaotian/llava-v1.6-vicuna-7b" \
    --version $PROMPT_VERSION \
    --data_path '/home/Synthetic-Data-AI/data/train_dataset.json' \
    --valid_path '/home/Synthetic-Data-AI/data/valid_dataset.json' \
    --image_folder /home/Synthetic-Data-AI/data/synthetic/ \
    --vision_tower "openai/clip-vit-large-patch14-336" \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --bf16 True \
    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_qlora_plant \
    --num_train_epochs 5 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 16 \
    --gradient_accumulation_steps 4 \
    --evaluation_strategy "epoch" \
    --save_strategy "steps" \
    --save_steps 200 \
    --save_total_limit 1 \
    --learning_rate 2e-5 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --lazy_preprocess True \
    --dataloader_num_workers 4 \
    --report_to wandb


Overwriting ./scripts/finetune_qlora_plant.sh


In [None]:
!./scripts/finetune_qlora_plant.sh

In [80]:
train_shot_data = []
valid_shot_data = []

for plant in class_list:
    for dir, _, files in os.walk(f'/home/Synthetic-Data-AI/data/synthetic-few-shot/5-shot/{plant}'):
        for i, file in tqdm(list(enumerate(sorted(files)))):
            if file.endswith('.png'):
                image_file = os.path.join(plant, file)
                input = copy.deepcopy(template)
                input['id'] = str(uuid.uuid4())
                input['image'] = image_file
                input['conversations'][1]['value'] = plant
                if i < int(len(files) * 0.8):
                    train_shot_data.append(input)
                else:
                    valid_shot_data.append(input)

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [81]:
with open('/home/Synthetic-Data-AI/data/train_5_shot_dataset.json', 'w') as f:
    f.write(json.dumps(train_shot_data, indent=4))
with open('/home/Synthetic-Data-AI/data/valid_5_shot_dataset.json', 'w') as f:
    f.write(json.dumps(valid_shot_data, indent=4))    

In [90]:
%%writefile ./scripts/finetune_qlora_plant_5_shot.sh
#!/bin/bash

################## VICUNA ##################
PROMPT_VERSION=v1
MODEL_VERSION="vicuna-v1-6-7b"
################## VICUNA ##################

deepspeed llava/train/train_mem.py \
    --deepspeed ./scripts/zero2.json \
    --lora_enable True \
    --lora_r 128 \
    --lora_alpha 256 \
    --bits 4 \
    --model_name_or_path "liuhaotian/llava-v1.6-vicuna-7b" \
    --version $PROMPT_VERSION \
    --data_path '/home/Synthetic-Data-AI/data/train_5_shot_dataset.json' \
    --valid_path '/home/Synthetic-Data-AI/data/valid_5_shot_dataset.json' \
    --image_folder /home/Synthetic-Data-AI/data/synthetic/ \
    --vision_tower "openai/clip-vit-large-patch14-336" \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --bf16 True \
    --output_dir ./checkpoints/llava-$MODEL_VERSION-5-shot-finetune_qlora_plant_10_epoch \
    --num_train_epochs 20 \
    --per_device_train_batch_size 32 \
    --per_device_eval_batch_size 16 \
    --gradient_accumulation_steps 4 \
    --evaluation_strategy "epoch" \
    --save_strategy "steps" \
    --save_steps 200 \
    --save_total_limit 1 \
    --learning_rate 1e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --lazy_preprocess True \
    --dataloader_num_workers 4 \
    --report_to wandb


Overwriting ./scripts/finetune_qlora_plant_5_shot.sh
