In [10]:
import pandas as pd
import re

experiments_list = ['erc20_[20_721_1155]', 'erc20_[20_721]', 'erc20_[20_1155]', 'erc20_[20]', 'erc20_[721_1155]', 'erc20_[721]', 'erc20_[1155]', 'erc20_[]']

for experiment in experiments_list:

    # Define file paths
    file_1_path = f'../../erc20/base_llm/refinement_check_{experiment}_base_llm.csv'
    file_2_path = f'../../erc20/llm_base/refinement_check_{experiment}_llm_base.csv'
    comparison_table_file_path = f'./{experiment}_comparison_table.csv'

    # List of desired functions to be analyzed
    desired_functions = ["allowance", "balanceOf", "transfer", "transferFrom", "approve"]

    # Load the two CSV files from the provided paths
    df1 = pd.read_csv(file_1_path)
    df2 = pd.read_csv(file_2_path)

    # Define a function to extract the function names and results from the output column
    def extract_function_results(output):
        results = []
        pattern = re.compile(r"Refinement::(\w+_?\w*): (\w+)")
        matches = pattern.findall(output)
        for func, result in matches:
            if any(desired_func in func for desired_func in desired_functions):
                results.append((func, result))
        return results

    # Initialize a list to store the comparison data
    comparison_data = []

    # Loop through both dataframes and compare the outputs
    for index, row in df1.iterrows():
        run = row['run']
        output_1 = row['output']
        output_2 = df2.loc[df2['run'] == run, 'output'].values[0]
        
        functions_1 = extract_function_results(output_1)
        functions_2 = extract_function_results(output_2)
        
        func_dict_2 = dict(functions_2)
        
        for func, result_1 in functions_1:
            if "post" in func:
                result_2 = func_dict_2.get(func, 'N/A')
                comparison_data.append([run, func, 'base_llm', result_1])
                comparison_data.append([run, func, 'llm_base', result_2])

    # Create a DataFrame from the comparison data
    comparison_df = pd.DataFrame(comparison_data, columns=['run', 'function', 'option', 'result'])

    # Pivot the DataFrame to get the desired table format
    pivot_table = comparison_df.pivot_table(index='run', columns=['function', 'option'], values='result', aggfunc=lambda x: ' '.join(x))

    # Add the "refines all" column
    def check_refines_all(row):
        base_llm_ok = all(row[func]['base_llm'] == 'OK' for func in row.index.levels[0])
        llm_base_ok = all(row[func]['llm_base'] == 'OK' for func in row.index.levels[0])
        return 'Yes' if base_llm_ok and llm_base_ok else 'No'

    pivot_table['refines all'] = pivot_table.apply(check_refines_all, axis=1)

    # Save the pivot table to a new CSV file
    pivot_table.to_csv(comparison_table_file_path)



FileNotFoundError: [Errno 2] No such file or directory: '../../erc20/base_llm/refinement_check_erc_20_[]_base_llm.csv'