In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import os 
def transform_column_to_numpy_array(dataframe, column_name):
    # Splitting the strings by ';' in each row, converting to integers, and flattening
    all_values = [int(value) for row in dataframe[column_name] for value in row.split(';')]
    
    # Converting the list to a NumPy array
    numpy_array = np.array(all_values)
    
    return numpy_array

def get_permutation_p(diff_lst, n_permutations=1000):
    import numpy as np
    diff_lst = np.array(diff_lst)
    # Calculate the observed test statistic
    observed_stat = np.mean(diff_lst)
    
    perm_stats = []

    for _ in range(n_permutations):
        # Randomly flip the signs of the differences
        signed_diffs = diff_lst * np.random.choice([-1, 1], size=diff_lst.size)
        # Calculate the mean of these permuted differences
        perm_stats.append(np.mean(signed_diffs))

    # Calculate the p-value
    perm_stats = np.array(perm_stats)
    p_value = np.mean(np.abs(perm_stats) >= np.abs(observed_stat))
    
    print(f'Observed Statistic: {observed_stat}')
    print(f'p-value: {p_value}')
    return observed_stat, p_value

def perform_t_test_and_save(model_name, sae_path,  output_csv_path):
    # Load data from text files
    df = pd.read_csv(sae_path)
    
    df['acc'] = (df['pd'] + df['npd']) / (df['pd'] +df['npd']+df['wa'])
    df_wf = df[df['case'].isin(['WF_FD', 'WF_SD'])].sort_values(by=['dataset', 'case'])
    df_cf = df[df['case'].isin(['CF_FD', 'CF_SD'])].sort_values(by=['dataset', 'case'])

    # Calling the function with the sample DataFrame and column name
    sample_wf = transform_column_to_numpy_array(df_wf, 'persuasion_counts')
    sample_cf = transform_column_to_numpy_array(df_cf, 'persuasion_counts')
    
    print(sample_wf)
    print(len(sample_wf))
    print(len(sample_cf))
    
    result = get_permutation_p(np.array(sample_cf) - np.array(sample_wf))
    avg_wf_cf = (sample_wf - sample_cf).mean()
    avg_cf = sample_cf.mean()
    
    # Prepare data for CSV
    data = [
        [model_name, "WF vs CF", result[0], result[1], avg_wf_cf, avg_cf],
    ]
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Model", "Metric", "Statistic", "P-Value", "AVG WF-CF", "AVG CF"])
    
    # Check if the CSV file already exists to decide on adding a header
    file_exists = os.path.isfile(output_csv_path)
    
    # Save to CSV, append if file exists, include header if file does not exist
    df.to_csv(output_csv_path, mode='a', index=False, header=not file_exists)
    
    print(f"Results saved to {output_csv_path}")


In [None]:
out_f = 'permutation_test_results.csv'
perform_t_test_and_save(
    "Alpaca",
    "results/results_alpaca.csv",
    out_f
)

perform_t_test_and_save(
    "GPT-4",
    "results/results_gpt-4-0613.csv",
    out_f
)

perform_t_test_and_save(
    "GPT-3.5-instruct",
    "results/results_gpt-3.5-turbo-instruct.csv",
    out_f
)


perform_t_test_and_save(
    "Llama7B",
    "results/results_llama2-7b-chat.csv",
    out_f
)

perform_t_test_and_save(
    "Llama13B",
    "results/results_llama2-13b-chat.csv",    
    out_f
)

perform_t_test_and_save(
    "Llama70B",
    "results/results_llama2-70b-chat.csv",
    out_f
)

perform_t_test_and_save(
    "Vicuna7B",
    "results/results_vicuna-7b-v1.5.csv",
    out_f
)

perform_t_test_and_save(
    "Vicuna13B",
    "results/results_vicuna-13b-v1.5.csv",
    out_f
)
