In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import os 
def get_permutation_p(diff_lst, n_permutations=1000):
    import numpy as np
    diff_lst = np.array(diff_lst)
    # Calculate the observed test statistic
    observed_stat = np.mean(diff_lst)
    
    perm_stats = []

    for _ in range(n_permutations):
        # Randomly flip the signs of the differences
        signed_diffs = diff_lst * np.random.choice([-1, 1], size=diff_lst.size)
        # Calculate the mean of these permuted differences
        perm_stats.append(np.mean(signed_diffs))

    # Calculate the p-value
    perm_stats = np.array(perm_stats)
    p_value = np.mean(np.abs(perm_stats) >= np.abs(observed_stat))
    
    print(f'Observed Statistic: {observed_stat}')
    print(f'p-value: {p_value}')
    return observed_stat, p_value

def perform_permutation_tests_and_save(model_name, sae_path, esl_path, aae_path, output_csv_path):
    # Load data from text files
    SAE = pd.read_csv(sae_path, sep=",", header=None)
    ESL = pd.read_csv(esl_path, sep=",", header=None)
    AAE = pd.read_csv(aae_path, sep=",", header=None)
    
    # Convert dataframes to numpy arrays
    sample_sae = SAE[1].to_numpy(dtype=float)
    sample_esl = ESL[1].to_numpy(dtype=float)
    sample_aae = AAE[1].to_numpy(dtype=float)
    
    # Perform KS tests
    result_sae_esl = get_permutation_p(sample_sae - sample_esl)
    result_sae_aae = get_permutation_p(sample_sae - sample_aae)
    result_esl_aae = get_permutation_p(sample_esl - sample_aae)
    # print(result_sae_esl, result_sae_aae, result_esl_aae)
    avg_sae_esl = (sample_sae - sample_esl).mean()
    avg_sae_aae = (sample_sae - sample_aae).mean()
    avg_esl_aae = (sample_esl - sample_aae).mean()
    
    # Prepare data for CSV
    data = [
        [model_name, "AES_Score", "SAE-ESL", result_sae_esl[0], result_sae_esl[1], avg_sae_esl],
        [model_name, "AES_Score", "SAE-AAE", result_sae_aae[0], result_sae_aae[1], avg_sae_aae],
        [model_name, "AES_Score", "ESL-AAE", result_esl_aae[0], result_esl_aae[1], avg_esl_aae]
    ]
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Model", "Metric", "Pair", "Statistic", "P-Value", "Avg Delta"])
    
    # Check if the CSV file already exists to decide on adding a header
    file_exists = os.path.isfile(output_csv_path)
    
    # Save to CSV, append if file exists, include header if file does not exist
    df.to_csv(output_csv_path, mode='a', index=False, header=not file_exists)
    
    print(f"Results saved to {output_csv_path}")


In [None]:
for model, val in {
    'alpaca': 'Alpaca',
    'gpt-4-1106-preview': 'GPT-4',
    'llama2_7b': 'Llama7B',
    'llama2_13b': 'Llama13B',
    'llama2_70b': "Llama70B",
    'vicuna_7b': 'Vicuna7B',
    'vicuna_13b': 'Vicuna13B'
}.items():
    perform_permutation_tests_and_save(
        val,
        f"scores/{model}_sae_results.txt",
        f"scores/{model}_esl_results.txt",
        f"scores/{model}_aae_results.txt",
    "ttest_results.csv"
    )