In [None]:
### Load trained models and test on all data
### save predictions as pkls

In [None]:
constructs = ['sentiment', 'sexism', 'hatespeech']

test_sets = {"sentiment" : [
                            "original",
                            "kaggle",
                            ],
             "sexism": [
                             "original",
                             "exist",
                       ],
             "hatespeech" : [
                             "original",
                            "hateval"]
            }

domain_mapping = {'original' : 'ID',
                 'kaggle' : 'OOD',
                 'exist' : 'OOD',
                 'hateval' : 'OOD'}

adv_test_sets = ['adv_inv', 'adv_swap']

labels = {'sentiment' : {'positive': 1, 'negative' : 0},
          'sexism' : {'sexist' : 1, 'non-sexist' : 0},
          'hatespeech' : {'hate' : 1, 'not hate' : 0}
                          }

runs = 5

In [None]:
classifiers = {}

for construct in constructs:
    if construct not in classifiers:
        classifiers[construct] = {}
    for model in models:
        if model not in classifiers[construct]:
            classifiers[construct][model] = {}
        for mode in modes:
            if mode not in classifiers[construct][model]:
                classifiers[construct][model][mode] = []
            for run in range(runs):
                if model == 'bert':
                    classifiers[construct][model][mode].append(load_model('../ml_models/%s_%s_%s_%d.joblib' %(construct,
                                                                                                      model,
                                                                                                      mode,
                                                                                                      run))) 
                else:
                    classifiers[construct][model][mode].append(load('../ml_models/%s_%s_%s_%d.joblib' %(construct,
                                                                                            model,
                                                                                            mode,
                                                                                            run)))
            

In [None]:
def predict(estimators, construct, test_sets, labels = {'positive' : 1, 'negative' : 0},
                      keep_neutral = False):
    all_test_data = pd.DataFrame()
    for test in test_sets:
        dists = []
        data = pd.read_csv("../data/data/%s/test/%s.csv" %(construct, test), sep = "\t")
        if not keep_neutral:
            data = data[data[construct].isin(labels)].reset_index()
        
        data[construct] = data[construct].map(labels)
        print(len(data))
        
        for n, mode in enumerate([True, False]):
            if mode:
                mode_ = 'Counterfactual'
            else:
                mode_ = 'Non-counterfactual'
            proba = estimators[mode][0].predict_proba(data['text'])
            y_pred = estimators[mode][0].predict(data['text'])
            df = pd.DataFrame(proba, columns = ['neg prob', 'pos prob'])
            df['_id'] = range(len(df))
            df['pred'] = y_pred
            df[construct] = data[construct]
            
            # for getting all predictions
            data['%s pred' %(mode)] = df['pred']
            data['%s pred proba' %(mode)] = df['pos prob']
            data['%s correct' %(mode)] = ['yes' if row[construct] == row['%s pred' %(mode)] else 'no'\
                                  for i, row in data.iterrows()] 
            
            
            dists.append(df['pos prob'])
            
            data['dataset'] = test
            
        data = data[['index', 'text', construct, 'dataset', 'True pred',
                     'True pred proba', 'False pred', 'False pred proba',
                     'True correct', 'False correct']]        
        all_test_data = all_test_data.append(data)

        print(test)
        print(stats.ttest_ind(dists[0], dists[1], equal_var = False))
        print()
    
    return all_test_data


#run once and save results
for construct in constructs:
        all_test_data[construct] = {}
        print(construct)
        for model in models:
            all_test_data[construct][model] = predict(classifiers[construct][model], construct,
                                                    test_sets = test_sets[construct],
                                                    labels = labels[construct])        

In [None]:
# test for adversarial examples but also test on their original counterparts 
# to prevent data size discrepencies

all_results = []

for construct in constructs:
    for run in range(runs):
        for model_name in models:
            for mode in [False, True]:
                for test_type in adv_test_types:
                    data = pd.read_csv(DATAPATH+"%s.csv" %(test_type), sep = '\t')
                
                    print()
                    print(construct, model_name, mode, test_type)
                    
                    # first the original examples
                    true, pred, cr = test(trained_models[construct][model_name][mode][run],
                                          data, construct, test = test_type + " original",
                                          labels = labels[construct], text_column = 'original')
                    all_results.append(get_results(cr, true, pred,
                                              method = model_name,
                                              mode = mode,
                                              construct = construct,     
                                              labels = {str(v): k for k, v in labels[construct].items()},
                                              dataset = test_type + " original"))
                    
                    # second the adversarial examples
                    true, pred, cr = test(trained_models[construct][model_name][mode][run],
                                          data, construct, test = test_type,
                                          labels = labels[construct])
                    all_results.append(get_results(cr, true, pred,
                                              method = model_name,
                                              mode = mode,
                                              construct = construct,     
                                              labels = {str(v): k for k, v in labels[construct].items()},
                                              dataset = test_type))

results = {}
result_df = pd.DataFrame(all_results)
result_df = result_df.rename({'1 Class F1': 'Pos F1'}, axis=1) 
for construct in constructs:
    result_df_ = result_df[result_df['construct'] == construct]
    results[construct] = result_df_.groupby(['construct','method', 'dataset', 'mode'])[['Pos F1', 'Macro F1']].mean().unstack()                                      

In [None]:
# pickle that dict
import pickle 

# with open("../results/result_pkls/adversarial_results.pkl", 'wb+') as f:
#     pickle.dump(all_test_data, f)

# with open("../results/result_pkls/adversarial_results.pkl2", 'wb+') as f:
#     pickle.dump(all_test_data, f, protocol=2)
    
    
# read results dict


with open("../results/result_pkls/in_out_domain_results.pkl", 'rb') as handle:
    all_test_data = pickle.load(handle)
    
    
with open("../results/result_pkls/adversarial_results.pkl", 'rb') as handle:
    adv_test_data = pickle.load(handle)    