In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# column names for question items
aoa_columns = ['aoa_1', 'aoa_2', 'aoa_3', 'aoa_4']
q_columns = ['Q1', 'Q2', 'Q3', 'Q4']

In [3]:
from scipy.spatial.distance import jensenshannon

def calculate_and_print_js_divergences(original_df, llm_df, aoa_columns, q_columns):

    def to_probability_distribution(responses, scale=7):
        counts = np.bincount(responses, minlength=scale+1)  # fit likert scale
        probability_distribution = counts / counts.sum()
        return probability_distribution[1:]  # remove index

    def jensen_shannon_divergence(P, Q):
        epsilon = 1e-10  # prevent log 0
        P = P + epsilon
        Q = Q + epsilon
        M = 0.5 * (P + Q)  
        divergence = 0.5 * np.sum(P * np.log2(P / M)) + 0.5 * np.sum(Q * np.log2(Q / M))
        return divergence
    
    js_divergences = []
    for aoa_col, q_col in zip(aoa_columns, q_columns):
        dist1 = to_probability_distribution(original_df[aoa_col].values)
        dist2 = to_probability_distribution(llm_df[q_col].values)
        js_div = jensen_shannon_divergence(dist1, dist2)
        js_divergences.append((aoa_col, q_col, js_div))
    
    print("Jensen-Shannon Divergences:")
    for aoa_col, llm_col, js_div in js_divergences:
        print(f"  {aoa_col} vs {llm_col}: {js_div:.4f}")

    return js_divergences

In [4]:
from scipy.stats import wasserstein_distance

def calculate_and_print_wasserstein_distances(original_df, llm_df, aoa_columns):
 
    wasserstein_distances = []

    for column in aoa_columns:
        # Generate corresponding column name in llm_df
        llm_column = f'Q{int(column.split("_")[1])}'  # Assumes consistent naming pattern
        
        # Extract distributions
        orig_distribution = original_df[column]
        llm_distribution = llm_df[llm_column]
        
        # Compute Wasserstein Distance
        distance = wasserstein_distance(orig_distribution, llm_distribution)
        wasserstein_distances.append((column, llm_column, distance))

    # Print results
    print("Wasserstein Distances:")
    for aoa_col, llm_col, distance in wasserstein_distances:
        print(f"  {aoa_col} vs {llm_col}: {distance:.4f}")

    return wasserstein_distances

In [5]:
# Prepare a list to store results for saving
results = []

base_path = '../../data'  # Example path for given result data we provided
original_df = pd.read_csv('../../data/idv_total.csv', index_col=0)  # Example path for given result data we provided

for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if os.path.isdir(folder_path):
        llm_file = os.path.join(folder_path, 'llm_responses.csv')
        llm_df = pd.read_csv(llm_file, index_col=0)
        print(f"==========Results of {folder}===========\n")

        # Calculate and print Jensen-Shannon Divergences
        js_divergences = calculate_and_print_js_divergences(original_df, llm_df, aoa_columns, q_columns)
        for aoa_col, llm_col, div_value in js_divergences:
            results.append({'Folder': folder, 'Metric': 'Jensen-Shannon', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': div_value})

        # Calculate and print Wasserstein Distances
        wasserstein_distances = calculate_and_print_wasserstein_distances(original_df, llm_df, aoa_columns)
        for aoa_col, llm_col, distance in wasserstein_distances:
            results.append({'Folder': folder, 'Metric': 'Wasserstein', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': distance})

        question_values_js = []  # Store JS divergences for Q1-Q4
        question_values_wasserstein = []  # Store Wasserstein distances for Q1-Q4
        
        for q in range(4):
            question_values_js.append(js_divergences[q][2])  

        for q in range(4):
            question_values_wasserstein.append(wasserstein_distances[q][2]) 

        avg_js = sum(question_values_js) / len(question_values_js) if question_values_js else 0
        avg_wasserstein = sum(question_values_wasserstein) / len(question_values_wasserstein) if question_values_wasserstein else 0

        results.append({
            'Folder': folder,
            'Metric': 'Jensen-Shannon (Average)',
            'Pair': 'Q1-Q4 Average',
            'Value': avg_js
        })

        results.append({
            'Folder': folder,
            'Metric': 'Wasserstein (Average)',
            'Pair': 'Q1-Q4 Average',
            'Value': avg_wasserstein
        })

        print(f"Average Jensen-Shannon Divergence (Q1-Q4) for {folder}: {avg_js}")
        print(f"Average Wasserstein Distance (Q1-Q4) for {folder}: {avg_wasserstein}")
        print('\n')

# Convert results to DataFrame and save as CSV
results_df = pd.DataFrame(results)
results_df.to_csv('../../analysis_result/metrics.csv', index=False)


Jensen-Shannon Divergences:
  aoa_1 vs Q1: 0.5360
  aoa_2 vs Q2: 0.2534
  aoa_3 vs Q3: 0.4103
  aoa_4 vs Q4: 0.4303
Wasserstein Distances:
  aoa_1 vs Q1: 1.1621
  aoa_2 vs Q2: 0.7459
  aoa_3 vs Q3: 1.0754
  aoa_4 vs Q4: 0.9755
Average Jensen-Shannon Divergence (Q1-Q4) for baseline_prompting: 0.4074896223336175
Average Wasserstein Distance (Q1-Q4) for baseline_prompting: 0.9897418927862343



Jensen-Shannon Divergences:
  aoa_1 vs Q1: 0.0780
  aoa_2 vs Q2: 0.0332
  aoa_3 vs Q3: 0.0806
  aoa_4 vs Q4: 0.0604
Wasserstein Distances:
  aoa_1 vs Q1: 0.6453
  aoa_2 vs Q2: 0.2991
  aoa_3 vs Q3: 0.7750
  aoa_4 vs Q4: 0.5387
Average Jensen-Shannon Divergence (Q1-Q4) for omni_prompting: 0.06305125931636747
Average Wasserstein Distance (Q1-Q4) for omni_prompting: 0.5645268034414295



Jensen-Shannon Divergences:
  aoa_1 vs Q1: 0.0537
  aoa_2 vs Q2: 0.0710
  aoa_3 vs Q3: 0.1048
  aoa_4 vs Q4: 0.0870
Wasserstein Distances:
  aoa_1 vs Q1: 0.2932
  aoa_2 vs Q2: 0.3891
  aoa_3 vs Q3: 0.7148
  aoa_4 vs 