In [5]:
import json
from tqdm import tqdm
import os

def load_json(file_path):
    """
    Load a JSON file and return its content as a Python dictionary.

    Parameters:
        file_path (str): The path to the JSON file.

    Returns:
        dict: The content of the JSON file as a dictionary.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def save_jsonl(data, file_path):
    """
    Save a list of dictionaries to a JSON Lines file.

    Parameters:
        data (list): A list of dictionaries to save.
        file_path (str): The path to the output JSON Lines file.
    """
    with open(file_path, 'w', encoding='utf-8') as file:
        for item in data:
            json.dump(item, file)
            file.write('\n')
            
def display_dict_keys_and_items(dictionary):
    """
    Display the keys and items of a dictionary.

    Parameters:
        dictionary (dict): The dictionary to display.

    Returns:
        None
    """
    for key, value in dictionary.items():
        print(f"Key: {key}, Value: {value}")
        


In [6]:
path = "/data_ssd/MMAD/MMAD_for_llava-onevision.json"
data = load_json(path)

In [7]:
print(f"Loaded {len(data)} items from {path}")
display_dict_keys_and_items(data[0])  # Display keys and items of the first dictionary

Loaded 39672 items from /data_ssd/MMAD/MMAD_for_llava-onevision.json
Key: id, Value: Anomaly Detection_DS-MVTec/bottle/image/broken_large/000.png
Key: image, Value: DS-MVTec/bottle/image/broken_large/000.png
Key: conversations, Value: [{'from': 'human', 'value': "Test image:\n<image>\nIs there any defect in the object?\nA. Yes.\nB. No.\nAnswer with the option's letter from the given choices directly."}, {'from': 'gpt', 'value': 'A'}]
Key: Answer, Value: A
Key: Question, Value: Is there any defect in the object?
Key: Options, Value: {'A': 'Yes.', 'B': 'No.'}
Key: type, Value: Anomaly Detection
Key: annotation, Value: True
Key: mask_path, Value: rbg_mask/broken_large/000_rbg_mask.png
Key: similar_templates, Value: ['MVTec-AD/bottle/train/good/001.png', 'MVTec-AD/bottle/train/good/061.png', 'MVTec-AD/bottle/train/good/199.png', 'MVTec-AD/bottle/train/good/124.png', 'MVTec-AD/bottle/train/good/149.png', 'MVTec-AD/bottle/train/good/147.png', 'MVTec-AD/bottle/train/good/089.png', 'MVTec-AD/b

In [None]:
save_jsonl_data = []
for i in tqdm(range(len(data))):
    item = data[i]
    result_entry = {
        "image": item["image"],
        "question": item["Question"],
        "question_type": item["type"],
        "gpt_answer": item["conversations"][-1]["value"],
        "correct_answer": item["Answer"],
    }
    save_jsonl_data.append(result_entry)
display_dict_keys_and_items(save_jsonl_data[0])  # Display keys and items of the first result entry

100%|██████████| 39672/39672 [00:00<00:00, 132738.25it/s]

Key: image, Value: DS-MVTec/bottle/image/broken_large/000.png
Key: question, Value: Is there any defect in the object?
Key: question_type, Value: Anomaly Detection
Key: gpt_answer, Value: A
Key: correct_answer, Value: A





In [None]:
save_jsonl(save_jsonl_data, "./MMAD_for_helper_eval.jsonl")