In [5]:
import json
import os
with open("/tmp/dataset/annotations/office/nando-eye/latest.json", 'r') as file:
    data = json.load(file)

with open("/tmp/dataset/annotations/office/nando-eye/benchmark/latest.json", 'r') as file:
    data_test = json.load(file)

In [6]:
import os
import sys

CUR_DIR = os.path.dirname(os.path.realpath("."))
sys.path.append(os.path.join(CUR_DIR, ".."))

import base64
import json
import shutil

import numpy as np
from sklearn.metrics import precision_recall_fscore_support as score
from utils import (ANNOTATIONS_BUCKET, DATA_FOLDER, ML_BUCKET,
                   download_bucket_folder, download_json, upload_json, get_exp_dir)


def get_annotations(busisness_type: str, model: str):
    path = os.path.join("annotations", busisness_type, model, "benchmark", "all", "latest.json")
    print(f"Downloading annotations {path}...")
    annotations = download_json(ANNOTATIONS_BUCKET, path)
    return annotations

def download_data(annotations, local_folder):
    print("Downloading data...")
    folders = set()
    for img in annotations["images"]:
        file_name = img["file_name"]
        if not os.path.exists(os.path.join(local_folder, file_name)):
            folder = "/".join(file_name.split("/")[:-1])
            folders.add(folder)

    for f in folders:
        local_dir = os.path.join(local_folder, f)
        os.makedirs(local_dir, exist_ok=True)
        download_bucket_folder(ANNOTATIONS_BUCKET, os.path.join("images", f), local_dir)


def prepare_benchmark_run(busisness_type: str, model: str):
    annotations = get_annotations(busisness_type, model)

    download_data(annotations, DATA_FOLDER)
    return annotations

In [2]:
anno = prepare_benchmark_run("office", "nando-eye")

Downloading annotations annotations/office/nando-eye/benchmark/all/latest.json...
Downloading data...
Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_nando_eye_benchmark_1/data to /tmp/images/benchmark/2023_nando_eye_benchmark_1/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_20_06_2023__12_05_14.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_30_15.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_30_44.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_35_30.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_35_59.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_36_21.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_1/data/03602cc0770e0e3e_21_06_2023__16_37_34.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchm

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_nando_eye_benchmark_4/data to /tmp/images/benchmark/2023_nando_eye_benchmark_4/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/03602cc0770e0e3e_12_10_2023__16_41_51.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/0b1707a511bd8c37_10_10_2023__19_05_01.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/164b88ebb479c004_20_10_2023__10_43_38.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/1ba843f64e07dff6_16_10_2023__18_15_00.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/1ba843f64e07dff6_25_10_2023__17_51_50.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/1ba843f64e07dff6_25_10_2023__17_52_07.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_4/data/1ba843f64e07dff6_25_10_2023__18_46_39.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchm

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_nando_eye_benchmark_2/data to /tmp/images/benchmark/2023_nando_eye_benchmark_2/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__11_01_21.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__13_50_17.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__13_56_59.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__14_40_21.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__14_15_40.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__16_05_42.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_2/data/03602cc0770e0e3e_03_07_2023__16_06_03.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchm

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_nando_eye_benchmark/data to /tmp/images/benchmark/2023_nando_eye_benchmark/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_14_08.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_17_07.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_18_31.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_28_53.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_36_28.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_39_12.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc22fc24dbe147_06_05_2023__10_43_12.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark/data/1dcc2

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_12_06_nando_eye_benchmark_7605/data to /tmp/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_01_12_2023__13_38_29.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_01_12_2023__13_38_57.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_04_12_2023__08_49_00.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_04_12_2023__14_31_35.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_05_12_2023__13_42_12.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_05_12_2023__13_45_25.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7605/data/54f2cf0d35de5bc3_05_12_2023__13_45_55.jpg...
Copying gs://

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2024_07_13_nando_eye_real_4839/data to /tmp/images/benchmark/2024_07_13_nando_eye_real_4839/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_05_03_2024__09_16_33.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_11_01_2024__12_20_33.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_11_03_2024__11_11_50.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_11_03_2024__11_12_58.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_11_03_2024__12_42_13.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_14_02_2024__16_32_38.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2024_07_13_nando_eye_real_4839/data/5b7ee39e21b64ff3_14_02_2024__16_38_29.jpg...
Copying gs://nando-mlops-dataset-prod/images/ben

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_nando_eye_benchmark_3/data to /tmp/images/benchmark/2023_nando_eye_benchmark_3/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/03602cc0770e0e3e_03_08_2023__09_18_48.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/03602cc0770e0e3e_31_07_2023__16_08_59.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/0b1707a511bd8c37_03_08_2023__16_20_19.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/0b1707a511bd8c37_01_08_2023__17_00_37.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/164b88ebb479c004_17_08_2023__08_23_13.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/164b88ebb479c004_17_08_2023__15_59_48.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchmark_3/data/195ffe866df09fd9_03_08_2023__16_19_58.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_nando_eye_benchm

Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_12_06_nando_eye_benchmark_7567/data to /tmp/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data/3758ef307238bb6a_04_12_2023__16_15_38.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data/3758ef307238bb6a_05_12_2023__19_15_02.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data/3758ef307238bb6a_05_12_2023__19_57_48.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data/3758ef307238bb6a_05_12_2023__21_47_57.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7567/data/3758ef307238bb6a_06_12_2023__08_21_35.jpg...
- [5/5 files][  1.0 MiB/  1.0 MiB] 100% Done                                    
Operation completed over 5 objects/1.0 MiB.                                      


Downloading from nando-mlops-dataset-prod with prefix images/benchmark/2023_12_06_nando_eye_benchmark_7616/data to /tmp/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data


Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data/75b272f2ee5ad1e3_27_11_2023__13_13_01.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data/75b272f2ee5ad1e3_27_11_2023__16_04_16.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data/75b272f2ee5ad1e3_27_11_2023__16_47_00.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data/75b272f2ee5ad1e3_27_11_2023__16_54_52.jpg...
Copying gs://nando-mlops-dataset-prod/images/benchmark/2023_12_06_nando_eye_benchmark_7616/data/75b272f2ee5ad1e3_27_11_2023__17_50_00.jpg...
- [5/5 files][887.2 KiB/887.2 KiB] 100% Done                                    
Operation completed over 5 objects/887.2 KiB.                                    


In [4]:

with open("/tmp/dataset/annotations/office/nando-eye/benchmark/latest.json", 'w+') as file:
    data = json.dump(anno, file)

In [11]:
img_dir = "/tmp/dataset/images"

def prepare_conversations_minicpm(annotations, prompt, dataset_dir, type_a):
    print("\nConverting annotations to conversations MiniCPM-V..." + type_a)

    dataset = []
    img_ids_map = {i["id"]: {"file": i["file_name"], "annos": []} for i in annotations["images"]}
    cat_map = {a["id"]: a["name"] for a in annotations["categories"]}

    for anno in annotations["annotations"]:
        img_ids_map[anno["image_id"]]["annos"].append(anno)

    for img_id, annos in img_ids_map.items():
        current_image = {}
        current_image["id"] = img_id
        current_image["image"] = os.path.join(img_dir, annos["file"])

        conversations = [{"role": "user", "content": f"<image>\n{prompt}"}]

        gpt = {}
        gpt["role"] = "assistant"
        gpt["content"] = (
            " and ".join([cat_map[a["category_id"]] for a in annos["annos"]])
            if len(annos["annos"]) > 0
            else "Nothing"
        )
        conversations.append(gpt)

        if len(annos["annos"]) == 1:
            human = {}
            human["role"] = "user"
            human[
                "content"
            ] = "Please provide the bounding box coordinate of the region corresponding to the item"
            conversations.append(human)

            gpt = {}
            gpt["role"] = "assistant"
            gpt["content"] = str(annos["annos"][0]["bbox"])
            conversations.append(gpt)

        current_image["conversations"] = conversations
        dataset.append(current_image)

    annotations_path = os.path.join(dataset_dir, "conversations_"+ type_a +".json")
    with open(annotations_path, "w") as f:
        json.dump(dataset, f, indent=2)
    return dataset

In [12]:
prompt = "Which object is shown to the camera? Answer the question using a single word or phrase."
conversations = prepare_conversations_minicpm(data, prompt,  ".", "real")


Converting annotations to conversations MiniCPM-V...real


In [13]:
conversations_test = prepare_conversations_minicpm(data_test, prompt,  ".", "test")


Converting annotations to conversations MiniCPM-V...test


In [10]:
!pwd

/home/llava/training/ml_experiments


In [None]:
/home/llava/training/ml_experiments/conversations_real.json