In [None]:
import os
import sys

parent_dir = os.path.dirname(os.getcwd())
sys.path.append(parent_dir)
from trusteval.dimension.robustness.robustness_vlm import pipeline
from trusteval.dimension.robustness.robustness_vlm import download_dataset
base_dir = "./test/robustness/"

os.makedirs(base_dir, exist_ok=True)

In [None]:
download_dataset.main(base_dir)

In [None]:
await pipeline.pipeline(base_dir=base_dir)

In [None]:
from trusteval.src.response_generator.lm_response import generate_responses
import shutil
source_config = "../section/robustness/robustness_vlm/file_config.json"
target_config = os.path.join(base_dir, "file_config.json")

if os.path.exists(source_config):
    shutil.copy2(source_config, target_config)
    print(f"Successfully copied file_config to {target_config}")
else:
    print("Warning: Source file_config not found")
    
data_folder = base_dir
async_list = ['gpt-4o']

async def process_files_original():
    await generate_responses(
        data_folder=base_dir,
        request_type="vlm",
        async_list=async_list,
        prompt_key="ori_prompt",
        result_key="ori_responses",
        image_key="ori_image_path"
    )
await process_files_original()

async def process_files_enhanced():
    await generate_responses(
        data_folder=base_dir,
        request_type="vlm",
        async_list=async_list,
        prompt_key="adv_prompt",
        result_key="adv_responses",
        image_key="adv_image_path"
    )
await process_files_enhanced()

In [None]:
from trusteval.src.evaluation import judge_responses
import shutil
config_path = os.path.join(parent_dir, 'trusteval/src/config/judge_prompt.yaml')
async_judge_model = ['gpt-4o-mini']
judge_key = 'judge'
judge_type='llm'
model_list = ['gpt-4o-mini']
async_list = ['gpt-4o']
async def process_vqa():
    #process original
    source_config = "../trusteval/dimension/robustness/robustness_vlm/file_config_vqa_ori.json"
    target_config = os.path.join(base_dir, "file_config.json")
    if os.path.exists(source_config):
        shutil.copy2(source_config, target_config)
        print(f"Successfully copied file_config to {target_config}")
    else:
        print("Warning: Source file_config not found")

    await judge_responses(
        data_folder=base_dir,
        async_judge_model=async_judge_model,
        target_models=async_list,
        judge_type=judge_type,
        response_key=['ori_responses'],
        judge_key= 'ori_judge'
    )

    #process adv
    source_config = "../trusteval/dimension/robustness/robustness_vlm/file_config_vqa.json"
    target_config = os.path.join(base_dir, "file_config.json")
    if os.path.exists(source_config):
        shutil.copy2(source_config, target_config)
        print(f"Successfully copied file_config to {target_config}")
    else:
        print("Warning: Source file_config not found")

    await judge_responses(
        data_folder=base_dir,
        async_judge_model=async_judge_model,
        target_models=async_list,
        judge_type=judge_type,
        response_key=['adv_responses'],
        judge_key= 'adv_judge'
    )
await process_vqa()

async def process_mscoco():
    source_config = "../trusteval/dimension/robustness/robustness_vlm/file_config_mscoco.json"
    target_config = os.path.join(base_dir, "file_config.json")
    response_key = ['ori_responses','adv_responses']
    
    if os.path.exists(source_config):
        shutil.copy2(source_config, target_config)
        print(f"Successfully copied file_config to {target_config}")
    else:
        print("Warning: Source file_config not found")

    await judge_responses(
        data_folder=base_dir,
        async_judge_model=async_judge_model,
        target_models=async_list,
        judge_type=judge_type,
        response_key=response_key,
        reverse_choice=True
    )
await process_mscoco()

In [None]:
from trusteval.src.evaluation import lm_metric
async_list = ['gpt-4o']

lm_metric(
    base_dir=base_dir,
    aspect="robustness_vlm",
    model_list=async_list
)