In [1]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Column names for question items
llm_responses = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5','Q6', 'Q7', 'Q8', 'Q9', 'Q10']
ori_responses = ["SAT_1", "SAT_2", "SAT_3", "LOY_1", "LOY_2", "LOY_3", "TRUST_1", "TRUST_2", "TRUST_3", "TRUST_4"]

In [3]:
from scipy.spatial.distance import jensenshannon

def calculate_and_store_js_divergences(original_df, llm_df, ori_responses, llm_responses):

    def to_probability_distribution(responses, scale=7):
        counts = np.bincount(responses, minlength=scale+1)  # fit likert scale
        probability_distribution = counts / counts.sum()
        return probability_distribution[1:]  # remove index

    def jensen_shannon_divergence(P, Q):
        epsilon = 1e-10  # prevent log 0
        P = P + epsilon
        Q = Q + epsilon
        M = 0.5 * (P + Q)  
        divergence = 0.5 * np.sum(P * np.log2(P / M)) + 0.5 * np.sum(Q * np.log2(Q / M))
        return divergence
    
    js_divergences = []
    for aoa_col, q_col in zip(ori_responses, llm_responses):
        dist1 = to_probability_distribution(original_df[aoa_col].values)
        dist2 = to_probability_distribution(llm_df[q_col].values)
        js_div = jensen_shannon_divergence(dist1, dist2)
        js_divergences.append((aoa_col, q_col, js_div))
    print("Jensen-Shannon Divergences:")
    for aoa_col, llm_col, js_div in js_divergences:
        print(f"  {aoa_col} vs {llm_col}: {js_div:.4f}")
    return js_divergences

In [4]:
from scipy.stats import wasserstein_distance

def calculate_and_store_wasserstein_distances(original_df, llm_df, ori_responses):
    selected_columns_df = original_df[ori_responses]
    for i in range(1, 11):
        llm_df[f'Q{i}_original'] = selected_columns_df.iloc[:, i - 1]
    llm_cols = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5','Q6', 'Q7', 'Q8', 'Q9', 'Q10']
    ori_cols = ["SAT_1", "SAT_2", "SAT_3", "LOY_1", "LOY_2", "LOY_3", "TRUST_1", "TRUST_2", "TRUST_3", "TRUST_4"]
    wasserstein_distances = []
    original_value = llm_df[['Q1', 'Q2', 'Q3', 'Q4', 'Q5','Q6', 'Q7', 'Q8', 'Q9', 'Q10']].values
    llm_value = llm_df[['Q1_original', 'Q2_original', 'Q3_original', 'Q4_original', 'Q5_original','Q6_original', 'Q7_original', 'Q8_original', 'Q9_original', 'Q10_original']].values
    
    for col in range(original_value.shape[1]): 
        orig_col = original_value[:, col]
        llm_col = llm_value[:, col]
        
        # Compute Wasserstein Distance 
        distance = wasserstein_distance(orig_col, llm_col)
        wasserstein_distances.append((ori_cols[col], llm_cols[col], distance))
    print("Wasserstein Distances:")
    for aoa_col, llm_col, distance in wasserstein_distances:
        print(f"  {aoa_col} vs {llm_col}: {distance:.4f}")
    return wasserstein_distances

In [5]:
original_df = pd.read_excel('DataInBrief_Bankdata.xlsx') ## example path for given result data we provided
base_path = './Data_Case2' ## example path for given result data we provided
results=[]
for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if os.path.isdir(folder_path): 
        llm_file = os.path.join(folder_path, 'llm_responses.csv')
        llm_df = pd.read_csv(llm_file, index_col=0) 
        print(f"==========Results of {folder}===========\n")
        js_divergences = calculate_and_store_js_divergences(original_df, llm_df, ori_responses, llm_responses)
        for aoa_col, llm_col, div_value in js_divergences:
            results.append({'Folder': folder, 'Metric': 'Jensen-Shannon', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': div_value})
        wasserstein_distances = calculate_and_store_wasserstein_distances(original_df, llm_df, ori_responses)     
        for aoa_col, llm_col, distance in wasserstein_distances:
            results.append({'Folder': folder, 'Metric': 'Wasserstein', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': distance})
       
        question_values_js = []  # Store JS divergences for Q1-Q10
        question_values_wasserstein = []  # Store Wasserstein distances for Q1-Q10
        for q in range(10):
            question_values_js.append(js_divergences[q][2])  

        for q in range(10):
            question_values_wasserstein.append(wasserstein_distances[q][2]) 

        avg_js = sum(question_values_js) / len(question_values_js) if question_values_js else 0
        avg_wasserstein = sum(question_values_wasserstein) / len(question_values_wasserstein) if question_values_wasserstein else 0

        results.append({
            'Folder': folder,
            'Metric': 'Jensen-Shannon (Average)',
            'Pair': 'Q1-Q10 Average',
            'Value': avg_js
        })

        results.append({
            'Folder': folder,
            'Metric': 'Wasserstein (Average)',
            'Pair': 'Q1-Q10 Average',
            'Value': avg_wasserstein
        })

        print(f"Average Jensen-Shannon Divergence (Q1-Q10) for {folder}: {avg_js}")
        print(f"Average Wasserstein Distance (Q1-Q10) for {folder}: {avg_wasserstein}")
        print('\n')
results_df = pd.DataFrame(results)
results_df.to_csv('./analysis_result/metrics_case2.csv', index=False)


Jensen-Shannon Divergences:
  SAT_1 vs Q1: 0.2908
  SAT_2 vs Q2: 0.2989
  SAT_3 vs Q3: 0.4801
  LOY_1 vs Q4: 0.2774
  LOY_2 vs Q5: 0.3330
  LOY_3 vs Q6: 0.3775
  TRUST_1 vs Q7: 0.2979
  TRUST_2 vs Q8: 0.3228
  TRUST_3 vs Q9: 0.2383
  TRUST_4 vs Q10: 0.2307
Wasserstein Distances:
  SAT_1 vs Q1: 0.7748
  SAT_2 vs Q2: 0.7985
  SAT_3 vs Q3: 1.1748
  LOY_1 vs Q4: 0.7881
  LOY_2 vs Q5: 0.8711
  LOY_3 vs Q6: 1.2059
  TRUST_1 vs Q7: 1.1185
  TRUST_2 vs Q8: 1.0607
  TRUST_3 vs Q9: 0.7467
  TRUST_4 vs Q10: 0.6993
Average Jensen-Shannon Divergence (Q1-Q10) for Baseline_prompting: 0.31474407373239144
Average Wasserstein Distance (Q1-Q10) for Baseline_prompting: 0.9238518518518518



Jensen-Shannon Divergences:
  SAT_1 vs Q1: 0.2602
  SAT_2 vs Q2: 0.2042
  SAT_3 vs Q3: 0.3197
  LOY_1 vs Q4: 0.2309
  LOY_2 vs Q5: 0.2389
  LOY_3 vs Q6: 0.1511
  TRUST_1 vs Q7: 0.3238
  TRUST_2 vs Q8: 0.3036
  TRUST_3 vs Q9: 0.1658
  TRUST_4 vs Q10: 0.2187
Wasserstein Distances:
  SAT_1 vs Q1: 0.8311
  SAT_2 vs Q2: 0.