In [1]:
import pickle
import fairness_metrics
import pandas as pd
from tqdm import tqdm
import utils

In [2]:
# read the data
ideology_df = pd.read_csv('./data/processed_annotated_comments.csv')
ideology_df['label'] = ideology_df['label'].apply(lambda x: None if x not in ['left', 'right'] else x)
ideology_df.dropna(inplace=True)
gender_df = pd.read_csv('./data/jigsaw/gender.csv')
race_df = pd.read_csv('./data/jigsaw/race.csv')
disability_df = pd.read_csv('./data/jigsaw/disability.csv')
sexual_orientation_df = pd.read_csv('./data/jigsaw/sexual_orientation.csv')
dfs = [ideology_df, gender_df, race_df, disability_df, sexual_orientation_df]
names = ['ideology', 'gender', 'race', 'disability', 'sexual orientation']

In [3]:
# load gold standard moderation if raw results are saved

# with open('./results/<openai_moderation>.pkl', 'rb') as handle: # comments_moderated_openai
#     gold1 = pickle.load(handle)

# with open('./results/<perspective_moderation>.pkl', 'rb') as handle: # comments_moderated_perspective_nonmanager
#     gold2 = pickle.load(handle)

# with open('./results/<google_palm_moderation>.pkl', 'rb') as handle: # comments_moderation_google
#     gold3 = pickle.load(handle)

# with open('./results/<clarifai_moderation>.pkl', 'rb') as handle: # comment_moderation_clarifai
#     gold4 = pickle.load(handle)

# gold1 = utils.process_openai(gold1)
# gold2 = utils.process_perspective(gold2)
# gold3 = utils.process_google(gold3)
# gold4 = utils.process_clarifai(gold4)

In [4]:
# load preprocessed results
import zipfile
with zipfile.ZipFile('./results/moderation_results.zip', 'r') as zip_ref:
    zip_ref.extractall('./results/')

with open('./results/moderation_results.pkl', 'rb') as file:
    fairness_results = pickle.load(file)


gold1 = {k:v['openai'] for k,v in fairness_results.items()}
gold2 = {k:v['perspective'] for k,v in fairness_results.items()}
gold3 = {k:v['google'] for k,v in fairness_results.items() if v['google'] in [True, False]}
gold4 = {k:v['clarifai'] for k,v in fairness_results.items()}

gold1 = {k: 0 if v is True else 1 for k,v in gold1.items()}
gold2 = {k: 0 if v is True else 1 for k,v in gold2.items()}
gold3 = {k:0 if v is True else 1 for k,v in gold3.items()}
gold4 = {k:0 if v is True else 1 for k,v in gold4.items()}

In [18]:
golds = [gold1, gold2, gold3, gold4]

methods = ['openai', 'perspective','google', 'clarifai']
perturbations = ['german', 'gpt_3.5_turbo']

results = []
global_results = {}

perturbation_map = {}

with zipfile.ZipFile('./results/comments_backtranslated_german_similarity.zip', 'r') as zip_ref:
    zip_ref.extractall('./results/')

with open('./results/comments_backtranslated_german_similarity.pkl', 'rb') as handle:
        perturbation_map[perturbations[0]] = pickle.load(handle)

with zipfile.ZipFile('./results/comment_paraphrased_gpt-3.5_final.zip', 'r') as zip_ref:
    zip_ref.extractall('./results/')

with open('./results/comment_paraphrased_gpt-3.5_final.pkl', 'rb') as handle:
        perturbation_map[perturbations[1]] = pickle.load(handle)

with zipfile.ZipFile(f'./results/moderation_results_fairness_perturbed_{perturbation}.zip', 'r') as zip_ref:
    zip_ref.extractall('./results/')

for perturbation in perturbations:
    with open(f'./results/moderation_results_fairness_perturbed_{perturbation}.pkl', 'rb') as handle:
        fairness_results = pickle.load(handle)
    
    phrase_map = perturbation_map[perturbation]
    if perturbation == "german":
        phrase_map = {k:v['augmented'] for k,v in phrase_map.items() if v['score'] > 0.85 and v['score'] != 1.0}

    for gold, method in zip(golds, methods):
        local_gold = gold.copy()
        global_results[method] = {}
        
        # check to ignore NULL values for phrases where moderation did not run
        fairness_results = {k:v for k,v in fairness_results.items() if fairness_results[k][method] in [True, False]}
        
        data = {k:1-int(fairness_results[v][method]) for k,v in phrase_map.items() if v in fairness_results.keys()}

        local_gold = {k:v for k,v in local_gold.items() if k in data.keys() and v in [True, False]}

        # create lists and compute robustness
        global_results[method][perturbation] = {}
        for subset, name in zip(dfs, names):
            df = subset.copy()
            df = df[df['text'].isin(list(local_gold.keys()))]
            a = [local_gold[k] for k in df['text'].tolist()]
            b = [data[k] for k in df['text'].tolist()]
            
            r = fairness_metrics.robustness(a, b)
            global_results[method][perturbation][name] = r*100
            print(r*100, method, perturbation, name)


5.734406438631791 openai german ideology
4.916041560658536 openai german gender
5.706883523442841 openai german race
4.741029387887644 openai german disability
5.692062930882093 openai german sexual orientation
1.8108651911468814 perspective german ideology
1.268814733351936 perspective german gender
1.7847504587816199 perspective german race
1.6885858093846402 perspective german disability
2.6339048965882976 perspective german sexual orientation
6.438631790744467 google german ideology
3.9319017599159443 google german gender
4.808869051904111 google german race
2.8306490971205465 google german disability
3.6591833127099167 google german sexual orientation
2.3138832997987926 clarifai german ideology
1.7417713206927685 clarifai german gender
1.6592880538458772 clarifai german race
2.2568598798506248 clarifai german disability
2.899063107654234 clarifai german sexual orientation
20.052310374891018 openai gpt_3.5_turbo ideology
20.088775790523837 openai gpt_3.5_turbo gender
28.33258020786

In [None]:
global_results

{'openai': {'backtranslated': {'ideology': 5.734406438631791,
   'gender': 4.916041560658536,
   'race': 5.706883523442841,
   'disability': 4.741029387887644,
   'sexual orientation': 5.692062930882093},
  'paraphrased': {'ideology': 20.422535211267608,
   'gender': 20.360126717331713,
   'race': 28.57399877658235,
   'disability': 21.302159441467772,
   'sexual orientation': 32.172529609333566}},
 'perspective': {'backtranslated': {'ideology': 1.8108651911468814,
   'gender': 1.268814733351936,
   'race': 1.7847504587816199,
   'disability': 1.6885858093846402,
   'sexual orientation': 2.6339048965882976},
  'paraphrased': {'ideology': 14.386317907444667,
   'gender': 7.1352362818639925,
   'race': 9.870101831528192,
   'disability': 10.440006494560805,
   'sexual orientation': 14.212480113134172}},
 'google': {'backtranslated': {'ideology': 6.438631790744467,
   'gender': 3.9319017599159443,
   'race': 4.808869051904111,
   'disability': 2.8306490971205465,
   'sexual orientation': 