In [1]:
import json
import os
import glob
import matplotlib.pyplot as plt
import numpy as np

In [2]:
def writeEvalsToFile(EVAL_PATH, RESULTS_PATH):
    files = glob.glob(os.path.join(EVAL_PATH, '*.json'), recursive=True)
    compiled_results = {}
    
    for path in files:
        operation = path.split('_')[-1][:-5]
        results_file = open(os.path.join(RESULTS_PATH, f"{path.split('/')[-1]}"), 'w')
        results = {}
        check_file = os.path.getsize(path)

        if check_file == 0:
            continue

        with open(path, 'r') as file:
            evals = json.load(file)
            total_correct = 0
            total_incorrect = 0
            unknown = 0


            yes_idx = []
            no_idx = []
            
            for idx, eval in enumerate(evals):
                ground_truth = eval['ground_truth']
                if ground_truth == 'yes':
                    yes_idx.append(idx)
                else:
                    no_idx.append(idx)
                    
            seed_value = 42
            np.random.seed(seed_value)
            
            # Determine the size of the smaller array
            min_size = min(len(yes_idx), len(no_idx))
            
            # Select equal number of elements from both arrays
            selected_elements_array1 = np.random.choice(yes_idx, min_size, replace=False)
            selected_elements_array2 = np.random.choice(no_idx, min_size, replace=False)
            
            for idx, eval in enumerate(evals):

                if idx not in selected_elements_array1 and idx not in selected_elements_array2:
                    continue
                    
                ground_truth = eval['ground_truth']
                answer_type = 'yes-no'
                consistent = eval['consistent'].split('consistent:')[-1]
                is_correct = "unknown"

                if 'yes' in consistent.lower() or 'true' in consistent.lower():
                    is_correct = "correct"
                    total_correct += 1
                elif 'no' in consistent.lower() or 'false' in consistent.lower():
                    is_correct = "incorrect"
                    total_incorrect += 1
                else:
                    unknown += 1

                key = f"{operation}_{ground_truth}_{is_correct}"

                results[key] = results.get(key, 0) + 1
                compiled_results[key] = compiled_results.get(key, 0) + 1


            results['prediction'] = {}
            results['prediction']['correct'] = total_correct
            results['prediction']['incorrect'] = total_incorrect
            results['prediction']['unknown'] = unknown

            # sorted_items = sorted(results.items(), key=lambda x: x[0])
            # sorted_dict = dict(sorted_items)

            if total_correct + total_incorrect > 0:
                results["accuracy"] = total_correct/(total_correct+total_incorrect)

            json.dump(results, results_file)
            results_file.close()


        compiled_results_file = open(os.path.join(RESULTS_PATH, "new_compiled_results.json"), 'w')
          
        json.dump(compiled_results, compiled_results_file)
        compiled_results_file.close()

In [3]:
def showEvalsPlotOver_( mllm, comparison = 'num'):
    lang_augmentations = ['orig', 'and', 'or', 'not', 'complex']
    results = [[] for _ in range(5)]

    if comparison == 'num':
        iterate = range(3,11)
    elif comparison == 'type':
        iterate = ['inter', 'intra']
    else:
        return
    
    for i in iterate:
        for idx, aug in enumerate(lang_augmentations):
            with open(f"{RESULTS_PATH}/val_{comparison}_{i}_{aug}.json", 'r') as file:
                data = json.load(file)    
                results[idx].append(data)
    
    x = iterate

    plt.figure(figsize=(8, 6))
    for idx, aug in enumerate(lang_augmentations):
        plt.plot(x, [res['accuracy'] for res in results[idx]], marker='o', label=f'accuracy_{aug}')
    
    plt.xlabel('Object')
    plt.ylabel('Accuracy')
    plt.title(f'Accuracy Variation over Objects - Language Augmentation ({mllm})')
    
    plt.legend()
    plt.tight_layout()
    
    plt.savefig(f'{RESULTS_PATH}/plot_{comparison}.pdf')
    plt.show()

In [4]:
MLLM_LIST = ['LLaVA', 'LLaMA', 'InstructBLIP']

for mllm in MLLM_LIST:
    EVAL_PATH = f'/scratch/averma90/MLLM_Hallucinations_CLEVR/outputs/language_augmentation/Zephyr_Results/{mllm}'  
    RESULTS_PATH = f'/scratch/averma90/MLLM_Hallucinations_CLEVR/outputs/language_augmentation/Zephyr_Analysis/{mllm}'
    
    writeEvalsToFile(EVAL_PATH, RESULTS_PATH)
    print(f'-------PLOTS FOR {mllm}-------')
    # showEvalsPlotOver_(mllm,'num')
    # showEvalsPlotOver_(mllm, 'type')

-------PLOTS FOR LLaVA-------
-------PLOTS FOR LLaMA-------
-------PLOTS FOR InstructBLIP-------
