In [None]:
!git lfs install

Git LFS initialized.


In [None]:
!git lfs --version

git-lfs/3.0.2 (GitHub; linux amd64; go 1.18.1)


In [None]:
!git lfs clone https://huggingface.co/liuhaotian/llava-v1.5-7b

          with new flags from 'git clone'

'git clone' has been updated in upstream Git to have comparable
speeds to 'git lfs clone'.
Cloning into 'llava-v1.5-7b'...
remote: Enumerating objects: 19, done.[K
remote: Total 19 (delta 0), reused 0 (delta 0), pack-reused 19[K
Unpacking objects: 100% (19/19), 4.70 KiB | 267.00 KiB/s, done.


In [None]:
!git lfs help smudge

In [None]:
import shutil

# Replace 'your_folder_path' with the actual path of the folder you want to delete
folder_path = '/content/datas'

# Use shutil.rmtree to delete the folder and its contents
shutil.rmtree(folder_path)

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.17.0-py3-none-any.whl (536 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/536.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/536.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m491.5/536.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=12.0.0 (from datasets)
  Downloading pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3

In [None]:
from datasets import load_dataset
from PIL import Image
from io import BytesIO
import requests
import os
import json
import uuid


def process_and_save(dataset, output_folder, subset_name):
    # Define image subfolder within output folder
    subset_folder = os.path.join(output_folder, subset_name)
    image_subfolder = os.path.join(output_folder, 'images')

    if not os.path.exists(image_subfolder):
        os.makedirs(image_subfolder)

    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)

    # Initialize list to hold all JSON data
    json_data_list = []

    # Process and save images and labels
    for item in dataset:
        # Load image if it's a URL or a file path
        if isinstance(item['image'], str):
            response = requests.get(item['image'])
            image = Image.open(BytesIO(response.content))
        else:
            image = item['image']  # Assuming it's a PIL.Image object

        # Create a unique ID for each image
        unique_id = str(uuid.uuid4())

        # Define image path
        image_path = os.path.join(image_subfolder, f"{unique_id}.jpg")

        # Save image
        image.save(image_path)

        # Remove duplicates and format answers
        answers = item['answers']
        unique_answers = list(set(answers))
        formatted_answers = ", ".join(unique_answers)

        # Structure for LLaVA JSON
        json_data = {
            "id": unique_id,
            "image": f"{unique_id}.jpg",
            "conversations": [
                {
                    "from": "human",
                    "value": item['question']
                },
                {
                    "from": "gpt",
                    "value": formatted_answers
                }
            ]
        }

        # Append to list
        json_data_list.append(json_data)

    # Save the JSON data list to a file
    json_output_path = os.path.join(output_folder, subset_name, 'dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)

def save_dataset(dataset_name, output_folder, class_name, subset_name, val_samples=None):
    # Load the dataset from Hugging Face
    dataset = load_dataset(dataset_name, split=subset_name)

    # Filter for images with the specified class in 'question_type'
    filtered_dataset = [item for item in dataset if item['question_type'] == class_name]

    # Determine the split for training and validation
    if val_samples is not None and subset_name == 'train':
        train_dataset = filtered_dataset[val_samples:]
        val_dataset = filtered_dataset[:val_samples]
    else:
        train_dataset = filtered_dataset
        val_dataset = []

    # Process and save the datasets
    for subset, data in [('train', train_dataset), ('validation', val_dataset)]:
        if data:
            process_and_save(data, output_folder, subset)


# Usage example
output_folder = 'dataset'
class_name = 'other'
val_samples = 300
save_dataset('Multimodal-Fatima/OK-VQA_train', output_folder, class_name, 'train', val_samples)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/2.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/395M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/389M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/389M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/399M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9009 [00:00<?, ? examples/s]

In [None]:
import zipfile
import os

zip_file_path = '/content/dataset.zip'

extracted_folder_path = '/content'

os.makedirs(extracted_folder_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

print(f"Zip file '{zip_file_path}' has been extracted to '{extracted_folder_path}'.")


Zip file '/content/dataset.zip' has been extracted to '/content'.


In [None]:
prompt = "Classify the image as one of the following: Falling forward using hands, Falling forward using knees, Falling backwards, Falling sideward, Falling sitting in empty chair, Walking, Standing, Sitting, Picking up an object, Jumping, Laying"

In [None]:
unique_id = 1234
json_data = {
    "id": unique_id,
    "image": f"{unique_id}.jpg",
    "conversations": [
        {
            "from": "human",
            "value": item['question']
        },
        {
            "from": "gpt",
            "value": formatted_answers
        }
    ]
}
# Append to list
json_data_list.append(json_data)

In [None]:
train="""
[
    {
        "id": "4da8c089-3050-4e85-bba4-638b49024d97",
        "image": "4da8c089-3050-4e85-bba4-638b49024d97.jpg",
        "conversations": [
            {
                "from": "human",
                "value": prompt
            },
            {
                "from": "gpt",
                "value": "Falling forward using hands"
            }
        ]
    }
]
"""

In [None]:
!git clone https://github.com/haotian-liu/LLaVA.git
!cd /content/LLaVA
%cd /content/LLaVA

Cloning into 'LLaVA'...
remote: Enumerating objects: 2238, done.[K
remote: Counting objects: 100% (1262/1262), done.[K
remote: Compressing objects: 100% (528/528), done.[K
remote: Total 2238 (delta 868), reused 775 (delta 734), pack-reused 976[K
Receiving objects: 100% (2238/2238), 13.86 MiB | 17.23 MiB/s, done.
Resolving deltas: 100% (1428/1428), done.
/content/LLaVA


In [None]:
import os

# Get the current working directory
current_directory = os.getcwd()

print(f"Current Working Directory: {current_directory}")

Current Working Directory: /content


In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:13
🔁 Restarting kernel...


In [None]:
!conda --version

conda 23.11.0


In [None]:
!cd /content/LLaVA
%cd /content/LLaVA

/content/LLaVA


In [None]:
!conda create -n llava python=3.10 -y
!conda init
!conda activate llava
!pip install --upgrade pip  # enable PEP 660 support
!pip install -e .

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | done
Solving environment: - \ done

## Package Plan ##

  environment location: /usr/local/envs/llava

  added / updated specs:
    - python=3.10


The following NEW packages will be INSTALLED:

  _libgcc_mutex      conda-forge/linux-64::_libgcc_mutex-0.1-conda_forge 
  _openmp_mutex      conda-forge/linux-64::_openmp_mutex-4.5-2_gnu 
  bzip2              conda-forge/linux-64::bzip2-1.0.8-hd590300_5 
  ca-certificates    conda-forge/linux-64::ca-certificates-2024.2.2-hbcca054_0 
  ld_impl_linux-64   conda-forge/linux-64::ld_impl_linux-64-2.40-h41732ed_0 
  libffi             conda-forge/linux-64::libffi-3.4.2-h7f98852_5 
  libgcc-ng          conda-forge/linux-64::libgcc-ng-13.2.0-h807b86a_5 
  libgomp            conda-forge/linux-64::libgomp-13.2.0-h807b86a_5 
  libnsl             conda-forge/linux-64::libnsl-2.0.1-hd590300_0 
  libsql

In [None]:
!conda init

no change     /usr/local/condabin/conda
no change     /usr/local/bin/conda
no change     /usr/local/bin/conda-env
no change     /usr/local/bin/activate
no change     /usr/local/bin/deactivate
no change     /usr/local/etc/profile.d/conda.sh
no change     /usr/local/etc/fish/conf.d/conda.fish
no change     /usr/local/shell/condabin/Conda.psm1
no change     /usr/local/shell/condabin/conda-hook.ps1
no change     /usr/local/lib/python3.10/site-packages/xontrib/conda.xsh
no change     /usr/local/etc/profile.d/conda.csh
no change     /root/.bashrc
No action taken.


In [None]:
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate llava
python --version
conda deactivate

Python 3.10.13




In [None]:
!conda activate llava


CondaError: Run 'conda init' before 'conda activate'



In [None]:
from llava import train


  warn("The installed version of bitsandbytes was compiled without GPU support. "


/usr/local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [None]:
script_file_path = '/content/dataset/train'
with open(script_file_path, 'w') as file:
    file.write(script)

In [None]:
!pip install deepspeed

Collecting deepspeed
  Downloading deepspeed-0.13.2.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting hjson (from deepspeed)
  Downloading hjson-3.1.0-py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.0/54.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ninja (from deepspeed)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
Collecting pynvml (from deepspeed)
  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: deepspeed
  Building wheel for deepspeed (set

In [None]:
# Copy and paste your bash script here
script = """
deepspeed /content/LLaVA/llava/train/train_mem.py \
    --deepspeed /content/LLaVA/scripts/zero2.json \
    --lora_enable True \
    --lora_r 128 \
    --lora_alpha 256 \
    --mm_projector_lr 2e-5 \
    --bits 4 \
    --model_name_or_path /content/llava-v1.5-7b \
    --version llava_llama_2 \
    --data_path /content/dataset/train/dataset.json \
    --validation_data_path /content/dataset/validation/dataset.json \
    --image_folder /content/dataset/images/ \
    --vision_tower openai/clip-vit-large-patch14-336 \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir /content/LLaVA/llava/checkpoints/llama-2-7b-chat-task-qlora \
    --num_train_epochs 1 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 256 \
    --evaluation_strategy "epoch" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb
"""

# Save the script to a file
script_file_path = '/content/run_script.sh'
with open(script_file_path, 'w') as file:
    file.write(script)

# Make the script executable
!chmod +x $script_file_path

# Run the script


In [None]:
!conda init

no change     /usr/local/condabin/conda
no change     /usr/local/bin/conda
no change     /usr/local/bin/conda-env
no change     /usr/local/bin/activate
no change     /usr/local/bin/deactivate
no change     /usr/local/etc/profile.d/conda.sh
no change     /usr/local/etc/fish/conf.d/conda.fish
no change     /usr/local/shell/condabin/Conda.psm1
no change     /usr/local/shell/condabin/conda-hook.ps1
no change     /usr/local/lib/python3.10/site-packages/xontrib/conda.xsh
no change     /usr/local/etc/profile.d/conda.csh
modified      /root/.bashrc

==> For changes to take effect, close and re-open your current shell. <==



In [None]:
import sys
print(sys.path)


['/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/root/.ipython', '/content/LLaVA/llava', '/content/llava', '/content/llava/train', '/content/LLaVa', '/content/LLaVA/llava']


In [None]:
import sys
sys.path.append('/content/LLaVa')


In [None]:
from llava import train  # Example: Importing the train module


[2024-02-13 09:39:07,187] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
import shutil

# Source and destination paths
source_path = '/content/LLaVA/llava'
destination_path = '/content'

# Copy the entire llava folder to the destination
shutil.copytree(source_path, os.path.join(destination_path, 'llava'))

print(f"llava folder has been copied from '{source_path}' to '{destination_path}'.")


llava folder has been copied from '/content/LLaVA/llava' to '/content'.


In [None]:
import zipfile

# Replace 'content/dataset.zip' with the actual path to your zip file
zip_file_path = '/content/dataset.zip'

# Replace 'content/extracted_folder' with the desired path for the extracted files
extracted_folder_path = '/content/dataset'

# Create the target directory if it doesn't exist
os.makedirs(extracted_folder_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

print(f"Zip file '{zip_file_path}' has been extracted to '{extracted_folder_path}'.")


Zip file '/content/dataset.zip' has been extracted to '/content/dataset'.


In [None]:
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path
from llava.eval.run_llava import eval_model

model_path = "liuhaotian/llava-v1.5-7b"

tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=model_path,
    model_base=None,
    model_name=get_model_name_from_path(model_path)
)

In [None]:
!$script_file_path

[2024-02-13 09:41:52,370] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-02-13 09:41:54,821] [INFO] [runner.py:568:main] cmd = /usr/bin/python3.real -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None /content/LLaVA/llava/train/train_mem.py --deepspeed /content/LLaVA/scripts/zero2.json --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --bits 4 --model_name_or_path /content/llava-v1.5-7b --version llava_llama_2 --data_path /content/dataset/train/dataset.json --validation_data_path /content/dataset/validation/dataset.json --image_folder /content/dataset/images/ --vision_tower openai/clip-vit-large-patch14-336 --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 True --output_dir /content/LLaVA/llava/

In [None]:
python run_llava.py --model-path /root/LLaVA/llava/checkpoints/llava-2-7b-chat-task-qlora/best_llava_eval_model_llava_lora
--model-base /root/LLaVA/llava/llava-v1.5-7b
--image-file /root/dataset/images/0f47c0b5-2c77-45e6-87b0-89af46e99500.jpg
--query “why was this photo taken?”

In [None]:
deepspeed llava/train/train_mem.py \
    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
    --deepspeed ./scripts/zero3.json \
    --model_name_or_path liuhaotian/llava-v1.5-13b \
    --version v1 \
    --data_path ./playground/data/llava_v1_5_mix665k.json \
    --image_folder ./playground/data \
    --vision_tower openai/clip-vit-large-patch14-336 \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir ./checkpoints/llava-v1.5-13b-task-lora \
    --num_train_epochs 1 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb

In [None]:
import shutil

# Replace 'your_folder_path' with the actual path of the folder you want to zip
folder_path = '/content/dataset'

# Replace 'your_zip_filename.zip' with the desired name for your zip file
zip_filename = 'dataset'

# Create a zip file
shutil.make_archive(zip_filename, 'zip', folder_path)

print(f"Folder '{folder_path}' has been zipped to '{zip_filename}'.")


Folder '/content/dataset' has been zipped to 'dataset'.


#