In [1]:
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [3]:
distance_dict = {
    1: -1, 
    2: -1, 
    3: -1, 
    4: 0, 
    5: 1, 
    6: 1, 
    7: 1
}

In [4]:
original_df = pd.read_csv('../../data/idv_total.csv', index_col=0)
base_path = '../../data'

In [5]:
results = []

for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if os.path.isdir(folder_path):
        llm_file = os.path.join(folder_path, 'llm_responses.csv')
        llm_df = pd.read_csv(llm_file, index_col=0)
        print(f"==========Results of {folder}===========\n")

        original_df_mapped = original_df.applymap(lambda x: distance_dict.get(x, None))
        llm_df_mapped = llm_df.applymap(lambda x: distance_dict.get(x, None))
        if list(original_df_mapped.index) != list(llm_df_mapped.index):
            print("Error!")

        llm_df_mapped['Q1_original'] = original_df_mapped['aoa_1']
        llm_df_mapped['Q2_original'] = original_df_mapped['aoa_2']
        llm_df_mapped['Q3_original'] = original_df_mapped['aoa_3']
        llm_df_mapped['Q4_original'] = original_df_mapped['aoa_4']

        llm_df_mapped['Q1_check'] = np.where(llm_df_mapped['Q1_original'] == llm_df_mapped['Q1'], 1, 0)
        llm_df_mapped['Q2_check'] = np.where(llm_df_mapped['Q2_original'] == llm_df_mapped['Q2'], 1, 0)
        llm_df_mapped['Q3_check'] = np.where(llm_df_mapped['Q3_original'] == llm_df_mapped['Q3'], 1, 0)
        llm_df_mapped['Q4_check'] = np.where(llm_df_mapped['Q4_original'] == llm_df_mapped['Q4'], 1, 0)

        Q1_accuracy = llm_df_mapped['Q1_check'].mean() * 100
        Q2_accuracy = llm_df_mapped['Q2_check'].mean() * 100
        Q3_accuracy = llm_df_mapped['Q3_check'].mean() * 100
        Q4_accuracy = llm_df_mapped['Q4_check'].mean() * 100

        mean_accuracy = (Q1_accuracy + Q2_accuracy + Q3_accuracy + Q4_accuracy) / 4

        print(f"Q1 Accuracy: {Q1_accuracy:.2f}%")
        print(f"Q2 Accuracy: {Q2_accuracy:.2f}%")
        print(f"Q3 Accuracy: {Q3_accuracy:.2f}%")
        print(f"Q4 Accuracy: {Q4_accuracy:.2f}%")
        print(f"Mean Accuracy: {mean_accuracy:.2f}%\n")

        results.append({
            'Folder': folder,
            'Q1_Accuracy': Q1_accuracy,
            'Q2_Accuracy': Q2_accuracy,
            'Q3_Accuracy': Q3_accuracy,
            'Q4_Accuracy': Q4_accuracy,
            'Mean_Accuracy': mean_accuracy
        })


Q1 Accuracy: 53.01%
Q2 Accuracy: 51.56%
Q3 Accuracy: 68.37%
Q4 Accuracy: 78.76%
Mean Accuracy: 62.92%


Q1 Accuracy: 63.40%
Q2 Accuracy: 69.76%
Q3 Accuracy: 71.94%
Q4 Accuracy: 80.41%
Mean Accuracy: 71.38%


Q1 Accuracy: 60.95%
Q2 Accuracy: 67.11%
Q3 Accuracy: 70.28%
Q4 Accuracy: 79.35%
Mean Accuracy: 69.42%


Q1 Accuracy: 40.37%
Q2 Accuracy: 51.22%
Q3 Accuracy: 67.97%
Q4 Accuracy: 78.29%
Mean Accuracy: 59.46%



In [6]:
results_df = pd.DataFrame(results)
results_df.to_csv('../../analysis_result/appendix/consistency_analysis/result.csv', index=False)