In [1]:
import json
import regex as re
from collections import Counter
from tqdm import tqdm

def load_json(file_path):
    """
    Load a JSON file and return its content as a Python dictionary.

    Parameters:
        file_path (str): The path to the JSON file.

    Returns:
        dict: The content of the JSON file as a dictionary.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data
def save_json(data, file_path):
    """
    Save a Python dictionary to a JSON file.

    Parameters:
        data (dict): The data to save.
        file_path (str): The path where the JSON file will be saved.
    """
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

In [None]:
json_path = "/data_ssd/object365/not-converted-counting_object365_for_llava-onevision.json"
json_data = load_json(json_path)

In [3]:
item = json_data[0]
for k,v in item.items():
    print(f"{k}: {v}")

id: f859081e-ef03-49c7-a1a1-a59d15d69300
image: objects365/train/patch3/objects365_v1_00144532.jpg
conversations: [{'from': 'human', 'value': '<image>\nPlease output bbox coordinates and names of every item in this image.'}, {'from': 'gpt', 'value': 'Person: [0.583,0.309,0.682,0.448]\nPerson: [0.617,0.346,0.797,0.640]\nPerson: [0.380,0.295,0.696,0.999]\nPerson: [-0.000,0.298,0.224,0.999]\nPerson: [0.301,0.322,0.372,0.537]\nPerson: [0.330,0.332,0.405,0.626]\nPerson: [0.360,0.329,0.472,0.692]\nBook: [0.623,0.525,0.699,0.567]\nBook: [0.204,0.488,0.275,0.511]\nBook: [0.185,0.508,0.261,0.534]\nBook: [0.660,0.399,0.692,0.427]\nBelt: [0.392,0.806,0.496,0.872]\nBelt: [0.816,0.583,0.839,0.603]\nBelt: [0.869,0.663,0.936,0.676]\nPen/Pencil: [0.280,0.512,0.312,0.560]\nPen/Pencil: [0.235,0.434,0.258,0.469]\nPerson: [0.727,0.340,0.778,0.478]\nBook: [0.711,0.471,0.814,0.495]\nPerson: [0.038,0.299,0.316,0.946]\nPerson: [0.765,0.288,0.998,0.860]'}]


In [4]:
import os
def make_conversation(id,image_path,question,answer,image_folder_root=None):
    if image_folder_root is not None:
        image_path = os.path.join(image_folder_root, image_path)
    return_data =   {
        "id": id,
        "image": image_path,
        "conversations": [
        {
            "from": "human",
            "value": f"<image>\n{question}"
        },
        {
            "from": "gpt",
            "value": answer
        },
        ]
    }
    return return_data

def make_answer(detection_answer):
    object_list = re.split(r": \[.*\]", detection_answer)
    object_list = [o.strip("\n") for o in object_list if len(o.strip("\n")) > 0]
    object_counter = Counter(object_list).most_common()

    text = ""
    for obj, count in object_counter:
        text += f"{obj}: {count}\n"

    return text.strip()
    

In [5]:
for item in tqdm(json_data):
    item["conversations"][0]["value"] = "<image>\nPlease output how many objects are present in the image for each object type."
    item["conversations"][1]["value"] = make_answer(item["conversations"][1]["value"])

100%|██████████| 871145/871145 [00:30<00:00, 28316.17it/s]


In [8]:
saved_data = load_json(json_path)
print(f"Total items: {len(saved_data)}")

Total items: 871145


In [9]:
print(saved_data[0])

{'id': 'f859081e-ef03-49c7-a1a1-a59d15d69300', 'image': 'objects365/train/patch3/objects365_v1_00144532.jpg', 'conversations': [{'from': 'human', 'value': '<image>\nPlease output bbox coordinates and names of every item in this image.'}, {'from': 'gpt', 'value': 'Person: [0.583,0.309,0.682,0.448]\nPerson: [0.617,0.346,0.797,0.640]\nPerson: [0.380,0.295,0.696,0.999]\nPerson: [-0.000,0.298,0.224,0.999]\nPerson: [0.301,0.322,0.372,0.537]\nPerson: [0.330,0.332,0.405,0.626]\nPerson: [0.360,0.329,0.472,0.692]\nBook: [0.623,0.525,0.699,0.567]\nBook: [0.204,0.488,0.275,0.511]\nBook: [0.185,0.508,0.261,0.534]\nBook: [0.660,0.399,0.692,0.427]\nBelt: [0.392,0.806,0.496,0.872]\nBelt: [0.816,0.583,0.839,0.603]\nBelt: [0.869,0.663,0.936,0.676]\nPen/Pencil: [0.280,0.512,0.312,0.560]\nPen/Pencil: [0.235,0.434,0.258,0.469]\nPerson: [0.727,0.340,0.778,0.478]\nBook: [0.711,0.471,0.814,0.495]\nPerson: [0.038,0.299,0.316,0.946]\nPerson: [0.765,0.288,0.998,0.860]'}]}


In [6]:
print(json_data[0])

{'id': 'f859081e-ef03-49c7-a1a1-a59d15d69300', 'image': 'objects365/train/patch3/objects365_v1_00144532.jpg', 'conversations': [{'from': 'human', 'value': '<image>\nPlease output how many objects are present in the image for each object type.'}, {'from': 'gpt', 'value': 'Person: 10\nBook: 5\nBelt: 3\nPen/Pencil: 2'}]}


In [None]:
save_json_path = "/data_ssd/object365/counting_object365_for_llava-onevision.json"
save_json(json_data, save_json_path)
print(f"Converted data saved to {save_json_path}")

Converted data saved to /data_ssd/object365/counting_converted_object365_for_llava-onevision.json


In [7]:
answer = item["conversations"][1]["value"]
print(answer)

Person: [0.583,0.309,0.682,0.448]
Person: [0.617,0.346,0.797,0.640]
Person: [0.380,0.295,0.696,0.999]
Person: [-0.000,0.298,0.224,0.999]
Person: [0.301,0.322,0.372,0.537]
Person: [0.330,0.332,0.405,0.626]
Person: [0.360,0.329,0.472,0.692]
Book: [0.623,0.525,0.699,0.567]
Book: [0.204,0.488,0.275,0.511]
Book: [0.185,0.508,0.261,0.534]
Book: [0.660,0.399,0.692,0.427]
Belt: [0.392,0.806,0.496,0.872]
Belt: [0.816,0.583,0.839,0.603]
Belt: [0.869,0.663,0.936,0.676]
Pen/Pencil: [0.280,0.512,0.312,0.560]
Pen/Pencil: [0.235,0.434,0.258,0.469]
Person: [0.727,0.340,0.778,0.478]
Book: [0.711,0.471,0.814,0.495]
Person: [0.038,0.299,0.316,0.946]
Person: [0.765,0.288,0.998,0.860]


In [None]:
import regex as re
print(answer.split(": ["))
object_list = re.split(r": \[.*\]", answer)
object_list = [o.strip("\n") for o in object_list if len(o.strip("\n")) > 0]
print(object_list)

['Person', '0.583,0.309,0.682,0.448]\nPerson', '0.617,0.346,0.797,0.640]\nPerson', '0.380,0.295,0.696,0.999]\nPerson', '-0.000,0.298,0.224,0.999]\nPerson', '0.301,0.322,0.372,0.537]\nPerson', '0.330,0.332,0.405,0.626]\nPerson', '0.360,0.329,0.472,0.692]\nBook', '0.623,0.525,0.699,0.567]\nBook', '0.204,0.488,0.275,0.511]\nBook', '0.185,0.508,0.261,0.534]\nBook', '0.660,0.399,0.692,0.427]\nBelt', '0.392,0.806,0.496,0.872]\nBelt', '0.816,0.583,0.839,0.603]\nBelt', '0.869,0.663,0.936,0.676]\nPen/Pencil', '0.280,0.512,0.312,0.560]\nPen/Pencil', '0.235,0.434,0.258,0.469]\nPerson', '0.727,0.340,0.778,0.478]\nBook', '0.711,0.471,0.814,0.495]\nPerson', '0.038,0.299,0.316,0.946]\nPerson', '0.765,0.288,0.998,0.860]']
['Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Book', 'Book', 'Book', 'Book', 'Belt', 'Belt', 'Belt', 'Pen/Pencil', 'Pen/Pencil', 'Person', 'Book', 'Person', 'Person']


In [None]:
from collections import Counter
object_counter = Counter(object_list).most_common()




[('Person', 10), ('Book', 5), ('Belt', 3), ('Pen/Pencil', 2)]
