In [22]:
import copy 
import os 
from os.path import join
import shutil
import itertools
from collections import Counter
import json
import pickle
import pprint
import pandas as pd
import numpy as np
import pprint
import torch 
import torch.nn.functional as F 
import math 

import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = [6, 8]

In [23]:
import sys
basedir = '/Users/RobertAdragna/Documents/School/Fourth_Year/ESC499-Thesis/codebases/causal_discovery'
sys.path.append(basedir)

import data_processing as dp 
import environment_processing as eproc 
import models 
from utils import proc_fteng, make_tensor

In [24]:
print(os.listdir(os.getcwd()))

['.DS_Store', '.ipynb_checkpoints', '00_old_formatting', '0525_smallsample', '0531_allgerman', '0602_validation', '0603_validation', '0610_baseline_adultgerman', '__pycache__', 'final_latex_results', 'latex_results', 'Plot_Generation.ipynb', 'prediction.ipynb']


In [25]:
res_dir = '0610_baseline_adultgerman/testing'
invariance_algos = {}   #'icp':{},
non_invariance_algos = {'linreg':{}}

for als in [invariance_algos, non_invariance_algos]:
    for al in als.keys(): 
        als[al]['expdir'] = join(join(join(os.getcwd(), res_dir), al), 'causal_discovery')
        als[al]['processed_dir'] = join(join(join(os.getcwd(), res_dir), al), 'processed_results')
        als[al]['params'] = pd.read_pickle(join(join(join(os.getcwd(), res_dir), al), '{}_paramfile.pkl'.format(al)))

        if not os.path.exists(als[al]['processed_dir']):
            raise Exception('Directory has not yet been processed')

In [19]:
# invariance_algos['linear_irm']['params']['Algo'] = invariance_algos['linear_irm']['params']['Algo'].apply(lambda x: x.replace('_', '-'))
# invariance_algos['linear_irm']['params'].tail(10)
# non_invariance_algos['linreg']['params'].head()
# non_invariance_algos['linreg']['params'].drop('linregressors', axis=1, inplace=True)
# non_invariance_algos['linreg']['params'].to_pickle('0610_baseline_adultgerman/testing/linreg/linreg_paramfile.pkl')

# Utilities

In [7]:
def pred_binarize(v):
    '''Convert all values to 0 if <0.5, 1 otherwise'''
    def thresh(x):
        if (x >= 0.5): return 1 
        else: return 0
    print(v.shape)
    return np.apply_along_axis(thresh, 1, v)
    
        
def compute_loss(pred, ground, ltype='MSE'):
    '''Compute loss between two prediction vectors'''

    
    if ltype == 'MSE':
        return F.mse_loss(torch.tensor(pred).float(), torch.tensor(ground).float()).numpy()
    if ltype == 'ACC':
        pred = pred_binarize(pred) 
        return 1 - F.mse_loss(torch.tensor(pred).float(), torch.tensor(ground).float()).numpy()
    
def fairness_dp(pred, ground, d, patts, ftype='DP'):
    '''Compute demographic aparity wrt data
    :param pred: vector, binary entries (np[float])
    :param ground: vector, binary entries (np[float])
    :param d: dataset (pandas df)
    :param patt: datts dict {cat:[all orig columns]}'''
    
    def avg_diff_scores(p):
        ''' Given a dictionary of scores for different sensitive attributes p.keys, \ 
            return the average difference between these values '''
        na = len(p.keys())
        if na <= 1:  #Error checking
            return np.nan
        
        n_combos = math.factorial(na)/ (2 * math.factorial(na - 2))
        
        tot = 0
        for pair in itertools.combinations(list(p.keys()), 2): 
            tot += abs(p[pair[0]] - p[pair[1]])
        
        return float(tot/na)

    
    #Get the protected attribute columns 
    assert len(patts.keys()) == 1
    protected = [patts[cat] for cat in patts.keys()][0]
    
    probs = {}   
    #Compute p(y_hat=1 | a, y)  Va  (demographic parity)

    for aval in protected: 
        if '_DUMmY' in aval:
            subpop = (d[[a for a in protected if '_DUMmY' not in a]] == 0).all(1).values.squeeze()
        else:
            subpop = (d[aval] == 1).values.squeeze()
        
        #Make sure that there are samples in the group of interest 
        if (subpop.sum() == 0) or ((subpop & (ground == 1).squeeze()).sum() == 0):
            continue
        
        #Compute fairness
        if ftype == 'DP': 
            probs[aval] = pred[subpop].sum() / len(pred[subpop])
        
        elif ftype == 'EOP':
            probs[aval] = pred[subpop & (ground == 1).squeeze()].sum() / len(pred[subpop & (ground == 1).squeeze()])
        
        elif ftype == 'CAL':
            probs[aval] = ground[subpop & (pred == 1).squeeze()].sum() / len(ground[subpop])

    return avg_diff_scores(probs)

In [8]:
def df_subset(df, subset):
    '''Get a subset of df rows whose columns specified in subset equal their respective values
    :param df: Dataframe (pandas)
    :param subset: Series of col_name:value pairs (pandas series)
    '''
    new_df = df.copy(deep=True)
    for col, val in pd.Series.iteritems(subset):
        new_df = new_df[new_df[col] == val]
    return new_df

def get_dset_fname(dset, b):
    if dset == 'adult':
        datafname = join(join(b, 'data'), 'adult.csv')
    elif dset == 'german':
        datafname = join(join(b, 'data'), 'germanCredit.csv')
    else:
        raise Exception('Dataset unimplemented')
    
    return datafname

In [9]:
def generate_all_existing_results(allcols, ags):
    ''' param allcols: A list of the features to be included
        param ags: A list of paramdfs for each algorithm'''
    add = pd.DataFrame()
    for param_df in ags: 
        if add.empty:
            add = param_df[allcols]
        else:
            add = add.append(param_df[allcols], ignore_index=True)
    
    uniq = np.logical_not(add.duplicated())
    return add[uniq]
    
    
def generate_results(fixed, compared): 
    '''
    :param fixed: A list of tuples (pname, pval) that are fixed across exps
    :param compared: A dictionary of pname:full range of possible values in experiment ''' 
    
    fixed_results = pd.Series([np.nan]*len(fixed), index=[f[0] for f in fixed]) #  , index=fixed_cols) 
    for f in fixed:
        fixed_results[f[0]] = f[1]
    
    #Set Up the Results Dataframe 
    compared_results = pd.DataFrame(itertools.product(*[compared[cat] for cat in compared]))
    compared_results.columns = list(compared.keys())
    
    #Set up the results 
    results = fixed_results.to_frame().T
    results['key'] = 0 
    compared_results['key'] = 0
    results = results.merge(compared_results, on='key', how='inner')
    results.drop('key', axis='columns', inplace=True)
    
    return results 

In [29]:
def compute_results(algos, old_resdf, orig_cols):
    reddata = -1
    loss_types = ['ACC']
    fairness_types = ['DP', 'EOP', 'CAL']
    sens_atts = {'adult':['race'], \
                 'german':['Personal']}     
    
    resdf = old_resdf.copy()
    
    #Get All The Results Columns of Interest: 
    res_cols = []
    for al in algos.keys():
        for m in ['train', 'test']:
            for l in loss_types:
                res_cols.append('{}-{}_error-{}'.format(al, m, l))
            for f in fairness_types:
                res_cols.append('{}-{}_fairness-{}'.format(al, m, f)) 
    for col in res_cols:  #Add cols to resultsdf
        resdf[col] = np.nan
    
    
    for al in algos.keys():  #Enumerate through algos     
        for resid, row in resdf.iterrows():
            algo_rescols = [c for c in res_cols if al in c]
            if row[algo_rescols].isnull().all():   #Check if merics for row already been computed 
                
                #Get entry of real dataset correpsonding to row 
                rel = df_subset(algos[al]['params'], row[orig_cols])  #Get row-associated entry in param dframe
                assert rel.shape[0] <= 1 #Guarentee just one (Excluding multi-index mappings)
                if rel.shape[0] == 0: 
                    continue
                
                alldata, all_y_all, d_atts = dp.data_loader(get_dset_fname(row['Dataset'], basedir), \
                                                              proc_fteng(row['Fteng']), \
                                                              dsize=reddata, \
                                                              bin=row['Bin'])
                #Split data
                train_data, train_y_all, d_atts, _, _, test_data, test_y_all = dp.train_val_test_split(\
                                                                          alldata, all_y_all, d_atts, test=row['TestSet'])
                
                #Compute Predictions  
                if al == 'icp':
                    model = models.InvariantCausalPrediction() 
                    learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'coeffs']))]
                    
                       
                    train_predictions = model.predict(train_data, *learned_model)
                    test_predictions = model.predict(test_data, *learned_model)

                elif (al == 'irm') or (al == 'linear_irm'):
                    if (al == 'irm'):
                        model = models.InvariantRiskMinimization()
                        try:
                            learned_model = [torch.load(join(res_dir, rel.loc[rel.index[0], 'phi']))]
                        except:
                            import pdb; pdb.set_trace()
                    elif (al == 'linear_irm'):
                        model = models.LinearInvariantRiskMinimization()
                        try:
                            learned_model = [torch.load(join(res_dir, rel.loc[rel.index[0], 'phi']))]
                        except:
                            import pdb; pdb.set_trace()
                    
       
                    train_predictions = model.predict(train_data.values, *learned_model, hid_layers=200)
                    test_predictions = model.predict(test_data.values, *learned_model, hid_layers=200)                

                elif al == 'linreg':
                    model = models.Linear()
                    learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'regressors']))]
                    
                    train_predictions = model.predict(train_data, *learned_model)   
                    test_predictions = model.predict(test_data, *learned_model)  
                
                elif al == 'logreg':
                    model = models.LogisticReg()
                    learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'regressors']))]
                    
                    train_predictions = model.predict(train_data, *learned_model)   
                    test_predictions = model.predict(test_data, *learned_model) 
                    
                #Compute Metrics on Predictions 
                for ftype in fairness_types:
                    for ltype in loss_types:
                        for r in [['train', train_predictions, train_y_all, train_data], ['test', test_predictions, test_y_all, test_data]] :
                            m, predictions, y_all, data = r[0], r[1], r[2], r[3]
                        
                            #Manage special case 
                            if predictions.empty:
                                resdf.loc[resid, '{}-{}_error-{}'.format(al, m, ltype)] = 'NA'
                                resdf.loc[resid, '{}-{}_fairness-{}'.format(al, m, ftype)] = 'NA'

                            else:
                                error = compute_loss(predictions.values, y_all.values, ltype=ltype)
                                full_fair = 0
                                for s in sens_atts[row['Dataset']]:
                                    fairness =  fairness_dp(pred_binarize(predictions.values), y_all.values,\
                                                                data, {s:d_atts[s]}, ftype=ftype)
                        
                                    if not np.isnan(fairness):
                                        full_fair += fairness/len(sens_atts[row['Dataset']])


                                #Save computed values to resdf 
                                resdf.loc[resid, '{}-{}_error-{}'.format(al, m, ltype)] = error
                                resdf.loc[resid, '{}-{}_fairness-{}'.format(al, m, ftype)] = full_fair
                                
    return resdf

# Hyperparameters

In [None]:
def split_data(row):
    alldata, all_y_all, d_atts = dp.data_loader(get_dset_fname(row['Dataset'], basedir), \
                                                              proc_fteng(row['Fteng']), \
                                                              dsize=-1, \
                                                              bin=row['Bin'])
    assert 'Val' not in row.index
    train_data, train_labels, d_atts, val_data, val_labels, test_data, test_labels = \
         dp.train_val_test_split(alldata, all_y_all, d_atts, val=0.2, test=row['TestSet'], seed=row['Seed'])
    
    return train_data, train_labels, val_data, val_labels, test_data, test_labels
        

def compute_hyperparameters(true_resdf):
    def compute_irm_loss(model, logits, labels, pen_reg):
        logits, labels = make_tensor(logits.values), make_tensor(labels.values)
        loss = model.mean_nll(logits, labels)
        pen = model.penalty(logits, labels)
        return (loss + (pen_reg * pen)).detach().numpy()
    def compute_linreg_loss(logits, labels, weight, lam):
        return ((logits - labels) ** 2).mean() + (weight * lam)
  
    
    
    resdf = true_resdf.copy(deep=True)
    resdf['training_loss'] = np.nan
    resdf['validation_loss'] = np.nan
                                                                               
    for resid, row in resdf.iterrows():
        
        #Load the data
        train_data, train_labels, val_data, val_labels, _, _ = split_data(row)
        
        if (row['Algo'] == 'irm') or (row['Algo'] == 'linear_irm') :                                                                         
            #Load the model
            if (row['Algo'] == 'irm'):
                try:
                    src = models.InvariantRiskMinimization()
                    params = torch.load(row['phi'])
                except:
                    resdf.drop(resid)
                    continue
                                      
            elif (row['Algo'] == 'linear_irm'):
                try:
                    src = models.LinearInvariantRiskMinimization()
                    params = torch.load(row['phi'])
                except:
                    resdf.drop(resid)
                    continue
                     
            train_logits = src.predict(train_data.values, params, hid_layers=row['HidLayers'])
            train_loss = compute_irm_loss(src, train_logits, \
                                      train_labels, row['PenWeight'])
            val_logits = src.predict(val_data.values, params, hid_layers=row['HidLayers'])
            val_loss = compute_irm_loss(src, val_logits, \
                                      val_labels, row['PenWeight'])
            
        elif (row['Algo'] == 'linreg'):
            src = models.Linear()
            coeffs = pd.read_pickle(row['regressors'])
            weight = src.get_weight_norm(coeffs)
            
            train_logits = src.predict(train_data, coeffs)
            train_loss = compute_linreg_loss(train_logits.values, train_labels.values, weight, row['Reg'])
            val_logits = src.predict(val_data, coeffs)
            val_loss = compute_linreg_loss(val_logits.values, val_labels.values, weight, row['Reg'])
              
        elif (row['Algo'] == 'logreg'):
            src = models.LogisticReg()
            coeffs = pd.read_pickle(row['regressors'])
            weight = src.get_weight_norm(coeffs)
            
            train_logits = src.predict(train_data, coeffs)
            train_loss = compute_linreg_loss(train_logits.values, train_labels.values, weight, row['Reg'])
            val_logits = src.predict(val_data, coeffs)
            val_loss = compute_linreg_loss(val_logits.values, val_labels.values, weight, row['Reg'])
                                                                               
        resdf.loc[resid, 'training_loss'] = train_loss    
        resdf.loc[resid, 'validation_loss'] = val_loss
    return resdf


In [None]:
tuning_algo = 'linear-irm'
res = compute_hyperparameters(invariance_algos[tuning_algo]['params'])

if (tuning_algo == 'irm') or (tuning_algo == 'linear-irm'):
    res.drop('phi', axis=1, inplace=True)
elif (tuning_algo == 'linreg') or (tuning_algo == 'logreg'):
    res.drop('regressors', axis=1, inplace=True)
else: 
    raise Exception('Unimplemented Algo')

In [None]:
adult = res[res['Dataset'] == 'adult']
at = adult.sort_values(by=['training_loss'])
av = adult.sort_values(by=['validation_loss'])

german = res[res['Dataset'] == 'german']
gt = german.sort_values(by=['training_loss'])
gv = german.sort_values(by=['validation_loss'])

In [None]:
drop_cols = ['Algo', 'Fteng', 'Dataset', 'ReduceDsize', 'Bin', 'Eq_Estrat', 'Envs']  # ['Algo', 'Fteng', 'Dataset', 'ReduceDsize', 'Bin']
view_cols = ["TestSet", 'LR', 'N_Iterations', 'PenWeight', 'HidLayers']  # ["TestSet", 'Reg']

tmp = av.drop(drop_cols, axis=1)
tmp = av.groupby(["TestSet", 'Reg'])[['training_loss', 'validation_loss']].mean()
tmp.head(200)

# Evaluating on Invariance Algorithms 

In [None]:
invar_FIXED = [['Dataset', 'adult'], \
               ['ReduceDsize', 10000], \
               ['Eq_Estrat', -1]] 

invar_COMPARED =  {'Envs':['workclass', 'native-country'], \
                   'Seed':[147, 256, 304],
                   'Fteng':['1', '12'], \
                   'Bin':[1]}

invar_orig_cols = [a[0] for a in invar_FIXED] + list(invar_COMPARED.keys()) + ['TestSet']
orig_invar_results = generate_all_existing_results(invar_orig_cols, \
                                             [invariance_algos[a]['params'] for a in list(invariance_algos.keys())])    
#invar_results = generate_results(invar_FIXED, invar_COMPARED)

# invar_results = invar_results[invar_results['Seed'] == 1000]
orig_invar_results.head(25)

In [None]:
# ##TMPPPPPPPPPP

# a = invariance_algos['linear_irm']['params']
# invariance_algos['linear_irm']['params'] = a[(a['Seed'] == 1000) & (a['LR'] == 0.01) & (a['N_Iterations'] == 1000) \
#                                             & (a['PenWeight'] == 1000) & (a['HidLayers'] == 100)]

In [None]:
invar_results = compute_results(invariance_algos, orig_invar_results, invar_orig_cols)

In [None]:
pd.options.display.max_colwidth = 4000
invar_results.head(20)

# Evaluating on Non-Invariance Algorithms

In [27]:
var_FIXED = [['Dataset', 'adult'], \
               ['ReduceDsize', 10000], \
               ['Bin', 1]] 

var_COMPARED =  {'Fteng':['1', '12'], \
                 'Seed':[147, 256, 304]}

var_orig_cols = [a[0] for a in var_FIXED] + list(var_COMPARED.keys()) + ['TestSet']
orig_var_results = generate_all_existing_results(var_orig_cols, \
                                             [non_invariance_algos[a]['params'] for a in list(non_invariance_algos.keys())]) 
# var_results = generate_results(var_FIXED, var_COMPARED)

orig_var_results.head(10)

Unnamed: 0_level_0,Dataset,ReduceDsize,Bin,Fteng,Seed,TestSet
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,adult,-1,0,-1,52,workclass_DUMmY
1,adult,-1,0,-1,52,native-country_DUMmY
2,adult,-1,0,-1,52,relationship_DUMmY
3,german,-1,0,-1,52,Purpose_DUMmY
4,german,-1,0,-1,52,Housing_DUMmY


In [30]:
var_results = compute_results(non_invariance_algos, orig_var_results, var_orig_cols)

(45086, 91)
(43696, 1)




(43696, 1)
(1390, 1)
(1390, 1)
(43696, 1)




(43696, 1)
(1390, 1)
(1390, 1)
(43696, 1)
(43696, 1)
(1390, 1)
(1390, 1)
(45086, 91)
(45060, 1)




(45060, 1)
(26, 1)
(26, 1)
(45060, 1)




(45060, 1)
(26, 1)
(26, 1)
(45060, 1)
(45060, 1)
(26, 1)
(26, 1)
(45086, 91)
(26472, 1)




(26472, 1)
(18614, 1)




(18614, 1)
(26472, 1)
(26472, 1)
(18614, 1)
(18614, 1)
(26472, 1)
(26472, 1)
(18614, 1)
(18614, 1)
(988, 34)
(754, 1)
(754, 1)
(234, 1)
(234, 1)
(754, 1)
(754, 1)
(234, 1)
(234, 1)
(754, 1)
(754, 1)
(234, 1)
(234, 1)




(988, 34)
(809, 1)
(809, 1)
(179, 1)
(179, 1)
(809, 1)
(809, 1)
(179, 1)
(179, 1)
(809, 1)
(809, 1)
(179, 1)
(179, 1)




In [31]:
pd.options.display.max_colwidth = 400
var_results.head(10)

Unnamed: 0_level_0,Dataset,ReduceDsize,Bin,Fteng,Seed,TestSet,linreg-train_error-ACC,linreg-train_fairness-DP,linreg-train_fairness-EOP,linreg-train_fairness-CAL,linreg-test_error-ACC,linreg-test_fairness-DP,linreg-test_fairness-EOP,linreg-test_fairness-CAL
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,adult,-1,0,-1,52,workclass_DUMmY,0.671958,0.202992,0.404121,0.151672,0.562679,0.378427,0.688473,0.342465
1,adult,-1,0,-1,52,native-country_DUMmY,0.665347,0.217094,0.406197,0.164073,0.642012,0.0,0.0,0.0
2,adult,-1,0,-1,52,relationship_DUMmY,0.870632,0.039946,0.144541,0.027652,0.537267,0.150493,0.226601,0.124686
3,german,-1,0,-1,52,Purpose_DUMmY,0.645579,0.234262,0.125865,0.211262,0.584886,0.129931,0.110048,0.140645
4,german,-1,0,-1,52,Housing_DUMmY,0.640148,0.138253,0.043706,0.087629,0.574857,0.150439,0.033929,0.441788


In [None]:
non_invariance_algos.keys()

# Save To Excel

In [32]:
excel_fname = '{}_results.xlsx'.format(res_dir)
var_results.to_excel(join(join(os.getcwd(), res_dir), excel_fname))

FileNotFoundError: [Errno 2] No such file or directory: '/Users/RobertAdragna/Documents/School/Fourth_Year/ESC499-Thesis/codebases/causal_discovery/results/0610_baseline_adultgerman/testing/0610_baseline_adultgerman/testing_results.xlsx'