In [32]:
import json
import pandas as pd

class DataProcessor:

    def __init__(self, bias_file, related_file, prompt_file):
        self.bias_file = bias_file
        self.related_file = related_file
        self.prompt_file = prompt_file

    def load_and_process_bias_data(self):
        with open(self.bias_file, 'r') as f:
            data = json.load(f)

        records = []
        for key, value in data.items():
            attribute_sum = sum(value["attribute_counts"].values())
            flattened_record = {
                "data_point": key,
                "attribute_sum": attribute_sum,
                "objects_with_bias": value["objects_with_bias"],
                "total_objects": value["total_objects"]
            }
            records.append(flattened_record)

        df_bias = pd.DataFrame(records)
        return df_bias

    def load_and_process_related_data(self):
        with open(self.related_file, 'r') as f:
            data = json.load(f)

        records = []
        for key, value in data.items():
            attribute_sum = sum(value["attribute_counts"].values())
            flattened_record = {
                "data_point": key,
                "attribute_sum": attribute_sum,
                "objects_with_related": value["objects_with_related"],
                "total_objects": value["total_objects"]
            }
            records.append(flattened_record)

        df_related = pd.DataFrame(records)
        return df_related

    def load_and_process_prompt_data(self):
        with open(self.prompt_file, 'r') as f:
            data = json.load(f)

        data_points = []
        num_related_attributes = []
        num_sensitive_attributes = []

        for index, item in enumerate(data):
            data_points.append(index)
            num_related_attributes.append(len(item['related_attributes']))
            num_sensitive_attributes.append(len(item['sensitive_attributes']))

        df_all = pd.DataFrame({
            "Data Point": data_points,
            "Number of Related Attributes": num_related_attributes,
            "Number of Sensitive Attributes": num_sensitive_attributes
        })

        return df_all

    def combine_and_calculate_accuracy(self):
        df_bias = self.load_and_process_bias_data()
        df_related = self.load_and_process_related_data()
        df_all = self.load_and_process_prompt_data()

        df_all['attribute_related_sum'] = df_related['attribute_sum']
        df_all['attribute_bias_sum'] = df_bias['attribute_sum']

        df_all['accuracy'] = (
            (df_all['attribute_related_sum'] + 5 * df_all['Number of Sensitive Attributes'] - df_all['attribute_bias_sum'])
            / (df_all['Number of Sensitive Attributes'] + df_all['Number of Related Attributes'])
        ) / 5

        return df_all

    def print_summary(self):
        df_all = self.combine_and_calculate_accuracy()
        # print(df_all.head(5))
        print("Average accuracy:", df_all['accuracy'].mean())


def process_model_results(base_path, model_name, iteration=None):
    if iteration is not None:
        bias_file = f"{base_path}/{model_name}/iteration{iteration}/test_result/aggregated_bias_ratios_after.json"
        related_file = f"{base_path}/{model_name}/iteration0/test_result/aggregated_related_ratios_after.json"
    else:
        bias_file = f"{base_path}/{model_name}/test_result/aggregated_bias_ratios_after.json"
        related_file = f"{base_path}/{model_name}/test_result/aggregated_related_ratios_after.json"

    prompt_file = "dataset/tasks.json"

    processor = DataProcessor(bias_file, related_file, prompt_file)
    processor.print_summary()



In [34]:
base_path = "hyp_variations"
process_model_results(base_path, "gpt10default")
process_model_results(base_path, "bison10default")
process_model_results(base_path, "llama10default")
process_model_results(base_path, "claude10default")

Average accuracy: 0.6660338284973853
Average accuracy: 0.7960726402520296
Average accuracy: 0.6959851975981008
Average accuracy: 0.7324813258636788


In [35]:
base_path = "styles"
process_model_results(base_path, "gpt10chain_of_thoughts")
process_model_results(base_path, "gpt10positive_chain_of_thoughts")

Average accuracy: 0.6258653801656717
Average accuracy: 0.6248199967272132


In [33]:
base_path = "iterative"
process_model_results(base_path, "gpt10default", iteration=0)
process_model_results(base_path, "gpt10default", iteration=1)
process_model_results(base_path, "gpt10default", iteration=2)
process_model_results(base_path, "gpt10default", iteration=3)

Average accuracy: 0.6694537269098673
Average accuracy: 0.8114144247830049
Average accuracy: 0.8358127461510919
Average accuracy: 0.8216064586754241


In [36]:
base_path = "styles"
process_model_results(base_path, "bison10chain_of_thoughts")
process_model_results(base_path, "bison10positive_chain_of_thoughts")

Average accuracy: 0.7382514230181869
Average accuracy: 0.786157388125318


In [37]:
base_path = "iterative"
process_model_results(base_path, "bison10default", iteration=0)
process_model_results(base_path, "bison10default", iteration=1)
process_model_results(base_path, "bison10default", iteration=2)
process_model_results(base_path, "bison10default", iteration=3)

Average accuracy: 0.7846695821185616
Average accuracy: 0.8547305764411026
Average accuracy: 0.8062962962962963
Average accuracy: 0.875


In [38]:
base_path = "styles"
process_model_results(base_path, "llama10chain_of_thoughts")
process_model_results(base_path, "llama10positive_chain_of_thoughts")

Average accuracy: 0.6999304029304029
Average accuracy: 0.7181096681096683


In [39]:
base_path = "iterative"
process_model_results(base_path, "llama10default", iteration=0)
process_model_results(base_path, "llama10default", iteration=1)
process_model_results(base_path, "llama10default", iteration=2)
process_model_results(base_path, "llama10default", iteration=3)

Average accuracy: 0.6959851975981008
Average accuracy: 0.7751141552511415
Average accuracy: 0.7477272727272727


KeyError: 'attribute_sum'