In [1]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Column names for question items
llm_responses = ['Q1', 'Q2', 'Q3']
ori_responses = [ "LOY_1", "LOY_2", "LOY_3"]

In [3]:
from scipy.spatial.distance import jensenshannon

def calculate_and_store_js_divergences(original_df, llm_df, ori_responses, llm_responses):

    def to_probability_distribution(responses, scale=7):
        counts = np.bincount(responses, minlength=scale+1)  # fit likert scale
        probability_distribution = counts / counts.sum()
        return probability_distribution[1:]  # remove index

    def jensen_shannon_divergence(P, Q):
        epsilon = 1e-10  # prevent log 0
        P = P + epsilon
        Q = Q + epsilon
        M = 0.5 * (P + Q)  
        divergence = 0.5 * np.sum(P * np.log2(P / M)) + 0.5 * np.sum(Q * np.log2(Q / M))
        return divergence
    
    js_divergences = []
    for aoa_col, q_col in zip(ori_responses, llm_responses):
        dist1 = to_probability_distribution(original_df[aoa_col].values)
        dist2 = to_probability_distribution(llm_df[q_col].values)
        js_div = jensen_shannon_divergence(dist1, dist2)
        js_divergences.append((aoa_col, q_col, js_div))
    print("Jensen-Shannon Divergences:")
    for aoa_col, llm_col, js_div in js_divergences:
        print(f"  {aoa_col} vs {llm_col}: {js_div:.4f}")
    return js_divergences

In [4]:
from scipy.stats import wasserstein_distance

def calculate_and_store_wasserstein_distances(original_df, llm_df, ori_responses):
    selected_columns_df = original_df[ori_responses]
    for i in range(1, 4):
        llm_df[f'Q{i}_original'] = selected_columns_df.iloc[:, i - 1]
    llm_cols = ['Q1', 'Q2', 'Q3']
    ori_cols = ["LOY_1", "LOY_2", "LOY_3"]
    wasserstein_distances = []
    original_value = llm_df[['Q1', 'Q2', 'Q3']].values
    llm_value = llm_df[['Q1_original', 'Q2_original', 'Q3_original']].values
    
    for col in range(original_value.shape[1]): 
        orig_col = original_value[:, col]
        llm_col = llm_value[:, col]
        
        # Compute Wasserstein Distance 
        distance = wasserstein_distance(orig_col, llm_col)
        wasserstein_distances.append((ori_cols[col], llm_cols[col], distance))
    print("Wasserstein Distances:")
    for aoa_col, llm_col, distance in wasserstein_distances:
        print(f"  {aoa_col} vs {llm_col}: {distance:.4f}")
    return wasserstein_distances

In [5]:
original_df = pd.read_excel('DataInBrief_Bankdata.xlsx') ## example path for given result data we provided
base_path = './Data_Case1' ## example path for given result data we provided
results=[]
for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if os.path.isdir(folder_path): 
        llm_file = os.path.join(folder_path, 'llm_responses.csv')
        llm_df = pd.read_csv(llm_file, index_col=0) 
        print(f"==========Results of {folder}===========\n")
        js_divergences = calculate_and_store_js_divergences(original_df, llm_df, ori_responses, llm_responses)
        for aoa_col, llm_col, div_value in js_divergences:
            results.append({'Folder': folder, 'Metric': 'Jensen-Shannon', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': div_value})
        wasserstein_distances = calculate_and_store_wasserstein_distances(original_df, llm_df, ori_responses)     
        for aoa_col, llm_col, distance in wasserstein_distances:
            results.append({'Folder': folder, 'Metric': 'Wasserstein', 'Pair': f'{aoa_col} vs {llm_col}', 'Value': distance})
       
        question_values_js = []  # Store JS divergences for Q1-Q10
        question_values_wasserstein = []  # Store Wasserstein distances for Q1-Q10
        for q in range(3):
            question_values_js.append(js_divergences[q][2])  

        for q in range(3):
            question_values_wasserstein.append(wasserstein_distances[q][2]) 

        avg_js = sum(question_values_js) / len(question_values_js) if question_values_js else 0
        avg_wasserstein = sum(question_values_wasserstein) / len(question_values_wasserstein) if question_values_wasserstein else 0

        results.append({
            'Folder': folder,
            'Metric': 'Jensen-Shannon (Average)',
            'Pair': 'Q1-Q3 Average',
            'Value': avg_js
        })

        results.append({
            'Folder': folder,
            'Metric': 'Wasserstein (Average)',
            'Pair': 'Q1-Q3 Average',
            'Value': avg_wasserstein
        })

        print(f"Average Jensen-Shannon Divergence (Q1-Q3) for {folder}: {avg_js}")
        print(f"Average Wasserstein Distance (Q1-Q3) for {folder}: {avg_wasserstein}")
        print('\n')
results_df = pd.DataFrame(results)
results_df.to_csv('./analysis_result/metrics_case1.csv', index=False)


Jensen-Shannon Divergences:
  LOY_1 vs Q1: 0.2943
  LOY_2 vs Q2: 0.3526
  LOY_3 vs Q3: 0.1555
Wasserstein Distances:
  LOY_1 vs Q1: 0.7748
  LOY_2 vs Q2: 1.3926
  LOY_3 vs Q3: 0.6830
Average Jensen-Shannon Divergence (Q1-Q3) for Demo_prompting: 0.2674496762424207
Average Wasserstein Distance (Q1-Q3) for Demo_prompting: 0.9501234567901232



Jensen-Shannon Divergences:
  LOY_1 vs Q1: 0.0551
  LOY_2 vs Q2: 0.0696
  LOY_3 vs Q3: 0.1894
Wasserstein Distances:
  LOY_1 vs Q1: 0.3733
  LOY_2 vs Q2: 0.4563
  LOY_3 vs Q3: 1.2281
Average Jensen-Shannon Divergence (Q1-Q3) for Omni_prompting: 0.10470513598787261
Average Wasserstein Distance (Q1-Q3) for Omni_prompting: 0.6859259259259259


