This Notebook is meant for processing the raw moderation results that are to be processed in order to obtain the binary moderation outputs. Please provide the paths of the respective dictionaries stored as pickle files where each phrase in the datasets is mapped to the moderation output of the AI Safety Moderation Classifers (openai, clarifai, google perspective and google PaLM2).

In [None]:
perturbations = ['german', 'gpt_3.5_turbo']
gpt_moderations = [
                    [
                    './results/<openai_moderation_for_german_backtranslation>.pkl', # comments_backtranslated_german_moderated_openai
                    './results/<google_perspective_moderation_for_german_backtranslation>.pkl', # comments_backtranslated_german_moderation_google_perspective
                    './results/<google_palm_moderation_for_german_backtranslation>.pkl', # comments_backtranslated_german_moderation_google_latest_may25
                    './results/<clarifai_moderation_for_german_backtranslation>.pkl' # german_comment_moderation_clarifai
                    ],
                    [
                    './results/<openai_moderation_for_gpt_turbo_paraphrasing>.pkl', # comments_paraphrased_gpt-3.5_final_moderated_openai 
                    './results/<google_perspective_moderation_for_gpt_turbo_paraphrasing>.pkl', # comments_moderation_google_perspective_gpt-3.5_final
                    './results/<google_palm_moderation_for_gpt_turbo_paraphrasing>.pkl', # comment_paraphrased_gpt-3.5_final_moderation_google_latest
                    './results/<clarifai_moderation_for_gpt_turbo_paraphrasing>.pkl' # comment_paraphrased_gpt-3.5_final_moderation_clarifai
                    ]
                ]

In [None]:
import pickle
import utils

def read_pickle(filepath):
    with open(filepath, 'rb') as handle:
        obj = pickle.load(handle)
    return obj

for moderation_array, perturbation in zip(gpt_moderations, perturbations):

    moderation_map_openai = utils.process_openai(read_pickle(moderation_array[0]))
    moderation_map_perspective = utils.process_perspective(read_pickle(moderation_array[1]))
    moderation_map_google = utils.process_google(read_pickle(moderation_array[2]))
    moderation_map_clarifai = utils.process_clarifai(read_pickle(moderation_array[3]))

    all_keys = set(key for d in [moderation_map_openai, moderation_map_perspective, moderation_map_google, moderation_map_clarifai] for key in d.keys())

    combined_dict = {}

    for key in all_keys:
        combined_dict[key] = {
            'openai': moderation_map_openai.get(key, 'NULL'),
            'clarifai': moderation_map_clarifai.get(key, 'NULL'),
            'perspective': moderation_map_perspective.get(key, 'NULL'),
            'google': moderation_map_google.get(key, 'NULL')
        }

    with open(f'./results/moderation_results_fairness_perturbed_{perturbation}.pkl', 'wb') as handle:
        pickle.dump(combined_dict, handle)