In [None]:
import copy 
import os 
from os.path import join
import shutil
import itertools
from collections import Counter
import json
import pickle
import pprint
import pandas as pd
import numpy as np
import pprint
import torch 
import torch.nn.functional as F 
import math 

import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = [6, 8]


In [None]:
import sys
basedir = '/Users/RobertAdragna/Documents/School/Fourth_Year/ESC499-Thesis/codebases/causal_discovery'
sys.path.append(basedir)

import data_processing as dp 
import environment_processing as eproc 
import models 
from utils import proc_fteng, make_tensor

In [None]:
print(os.listdir(os.getcwd()))

In [None]:
res_dir = '0610_baseline_adultgerman/hyperparam_tuning'
algo = 'logreg'
algo_params = pd.read_pickle(join(join(join(os.getcwd(), res_dir), algo), '{}_paramfile.pkl'.format(algo)))


algo_pdir = join(join(join(os.getcwd(), res_dir), algo), 'processed_results')
if not os.path.exists(algo_pdir):
    raise Exception('Directory has not yet been processed')

# Utilities

In [None]:
def pred_binarize(v):
    '''Convert all values to 0 if <0.5, 1 otherwise'''
    def thresh(x):
        if (x >= 0.5): return 1 
        else: return 0
    print(v.shape)
    return np.apply_along_axis(thresh, 1, v)
    
        
def compute_loss(pred, ground, ltype='MSE'):
    '''Compute loss between two prediction vectors'''

    
    if ltype == 'MSE':
        return F.mse_loss(torch.tensor(pred).float(), torch.tensor(ground).float()).numpy()
    if ltype == 'ACC':
        pred = pred_binarize(pred) 
        return 1 - F.mse_loss(torch.tensor(pred).float(), torch.tensor(ground).float()).numpy()
    
def fairness_dp(pred, ground, d, patts, ftype='DP'):
    '''Compute demographic aparity wrt data
    :param pred: vector, binary entries (np[float])
    :param ground: vector, binary entries (np[float])
    :param d: dataset (pandas df)
    :param patt: datts dict {cat:[all orig columns]}'''
    
    def avg_diff_scores(p):
        ''' Given a dictionary of scores for different sensitive attributes p.keys, \ 
            return the average difference between these values '''
        na = len(p.keys())
        if na <= 1:  #Error checking
            return np.nan
        
        n_combos = math.factorial(na)/ (2 * math.factorial(na - 2))
        
        tot = 0
        for pair in itertools.combinations(list(p.keys()), 2): 
            tot += abs(p[pair[0]] - p[pair[1]])
        
        return float(tot/na)

    
    #Get the protected attribute columns 
    assert len(patts.keys()) == 1
    protected = [patts[cat] for cat in patts.keys()][0]
    
    probs = {}   
    #Compute p(y_hat=1 | a, y)  Va  (demographic parity)

    for aval in protected: 
        if '_DUMmY' in aval:
            subpop = (d[[a for a in protected if '_DUMmY' not in a]] == 0).all(1).values.squeeze()
        else:
            subpop = (d[aval] == 1).values.squeeze()
        
        #Make sure that there are samples in the group of interest 
        if (subpop.sum() == 0) or ((subpop & (ground == 1).squeeze()).sum() == 0):
            continue
        
        #Compute fairness
        if ftype == 'DP': 
            probs[aval] = pred[subpop].sum() / len(pred[subpop])
        
        elif ftype == 'EOP':
            probs[aval] = pred[subpop & (ground == 1).squeeze()].sum() / len(pred[subpop & (ground == 1).squeeze()])
        
        elif ftype == 'CAL':
            probs[aval] = ground[subpop & (pred == 1).squeeze()].sum() / len(ground[subpop])

    return avg_diff_scores(probs)

In [None]:
def df_subset(df, subset):
    '''Get a subset of df rows whose columns specified in subset equal their respective values
    :param df: Dataframe (pandas)
    :param subset: Series of col_name:value pairs (pandas series)
    '''
    new_df = df.copy(deep=True)
    for col, val in pd.Series.iteritems(subset):
        new_df = new_df[new_df[col] == val]
    return new_df

def get_dset_fname(dset, b):
    if dset == 'adult':
        datafname = join(join(b, 'data'), 'adult.csv')
    elif dset == 'german':
        datafname = join(join(b, 'data'), 'germanCredit.csv')
    else:
        raise Exception('Dataset unimplemented')
    
    return datafname

In [None]:
def generate_all_existing_results(allcols, ags):
    ''' param allcols: A list of the features to be included
        param ags: A list of paramdfs for each algorithm'''
    add = pd.DataFrame()
    for param_df in ags: 
        if add.empty:
            add = param_df[allcols]
        else:
            add = add.append(param_df[allcols], ignore_index=True)
    
    uniq = np.logical_not(add.duplicated())
    return add[uniq]

In [None]:
def compute_results(al, al_params, old_resdf, orig_cols):
    reddata = -1
    loss_types = ['ACC']
    fairness_types = ['DP', 'EOP', 'CAL']
    sens_atts = {'adult':['race'], \
                 'german':['Personal']}     
    
    resdf = old_resdf.copy()
    
    #Get All The Results Columns of Interest: 
    res_cols = []
    for m in ['train', 'test']:
        for l in loss_types:
            res_cols.append('{}-{}_error-{}'.format(al, m, l))
        for f in fairness_types:
            res_cols.append('{}-{}_fairness-{}'.format(al, m, f)) 
    for col in res_cols:  #Add cols to resultsdf
        resdf[col] = np.nan
    
    
    
    for resid, row in resdf.iterrows():
        algo_rescols = [c for c in res_cols if al in c]
        if row[algo_rescols].isnull().all():   #Check if merics for row already been computed 

            #Get entry of real dataset correpsonding to row 
            rel = df_subset(al_params, row[orig_cols])  #Get row-associated entry in param dframe
            assert rel.shape[0] <= 1 #Guarentee just one (Excluding multi-index mappings)
            if rel.shape[0] == 0: 
                continue

            alldata, all_y_all, d_atts = dp.data_loader(get_dset_fname(row['Dataset'], basedir), \
                                                          proc_fteng(row['Fteng']), \
                                                          dsize=reddata, \
                                                          bin=row['Bin'])
            #Split data
            train_data, train_y_all, d_atts, _, _, test_data, test_y_all = dp.train_val_test_split(\
                                                                      alldata, all_y_all, d_atts, test=row['TestSet'])

            #Compute Predictions  
            if al == 'icp':
                model = models.InvariantCausalPrediction() 
                learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'coeffs']))]


                train_predictions = model.predict(train_data, *learned_model)
                test_predictions = model.predict(test_data, *learned_model)

            elif (al == 'irm') or (al == 'linear_irm'):
                if (al == 'irm'):
                    model = models.InvariantRiskMinimization()
                    try:
                        learned_model = [torch.load(join(res_dir, rel.loc[rel.index[0], 'phi']))]
                    except:
                        import pdb; pdb.set_trace()
                elif (al == 'linear_irm'):
                    model = models.LinearInvariantRiskMinimization()
                    try:
                        learned_model = [torch.load(join(res_dir, rel.loc[rel.index[0], 'phi']))]
                    except:
                        import pdb; pdb.set_trace()


                train_predictions = model.predict(train_data.values, *learned_model, hid_layers=200)
                test_predictions = model.predict(test_data.values, *learned_model, hid_layers=200)

            elif al == 'linreg':
                model = models.Linear()
                learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'regressors']))]

                train_predictions = model.predict(train_data, *learned_model)   
                test_predictions = model.predict(test_data, *learned_model)  

            elif al == 'logreg':
                model = models.LogisticReg()
                learned_model = [pd.read_pickle(join(res_dir, rel.loc[rel.index[0], 'regressors']))]

                train_predictions = model.predict(train_data, *learned_model)   
                test_predictions = model.predict(test_data, *learned_model) 
                
            elif (al == 'mlp'):
                model = models.MLP()
                learned_model = [torch.load(join(res_dir, rel.loc[rel.index[0], 'weights']))]
                train_predictions = model.predict(train_data.values, *learned_model, hid_layers=50)
                test_predictions = model.predict(test_data.values, *learned_model, hid_layers=50)  

            elif al == 'constant':
                model = models.Constant()

                train_predictions = model.predict(train_data)   
                test_predictions = model.predict(test_data) 


                
            #Compute Metrics on Predictions 
            for ftype in fairness_types:
                for ltype in loss_types:
                    for r in [['train', train_predictions, train_y_all, train_data], ['test', test_predictions, test_y_all, test_data]] :
                        m, predictions, y_all, data = r[0], r[1], r[2], r[3]

                        #Manage special case 
                        if predictions.empty:
                            resdf.loc[resid, '{}-{}_error-{}'.format(al, m, ltype)] = 'NA'
                            resdf.loc[resid, '{}-{}_fairness-{}'.format(al, m, ftype)] = 'NA'

                        else:
                            error = compute_loss(predictions.values, y_all.values, ltype=ltype)
                            full_fair = 0
                            for s in sens_atts[row['Dataset']]:
                                fairness =  fairness_dp(pred_binarize(predictions.values), y_all.values,\
                                                            data, {s:d_atts[s]}, ftype=ftype)

                                if not np.isnan(fairness):
                                    full_fair += fairness/len(sens_atts[row['Dataset']])


                            #Save computed values to resdf 
                            resdf.loc[resid, '{}-{}_error-{}'.format(al, m, ltype)] = error
                            resdf.loc[resid, '{}-{}_fairness-{}'.format(al, m, ftype)] = full_fair

    return resdf

# Hyperparameters

In [None]:
def split_data(row):
    alldata, all_y_all, d_atts = dp.data_loader(get_dset_fname(row['Dataset'], basedir), \
                                                              proc_fteng(row['Fteng']), \
                                                              dsize=-1, \
                                                              bin=row['Bin'])
    assert 'Val' not in row.index
    train_data, train_labels, d_atts, val_data, val_labels, test_data, test_labels = \
         dp.train_val_test_split(alldata, all_y_all, d_atts, val=0.2, test=row['TestSet'], seed=row['Seed'])
    
    return train_data, train_labels, val_data, val_labels, test_data, test_labels
        

def compute_hyperparameters(true_resdf):
    def compute_irm_loss(model, logits, labels, pen_reg):
        logits, labels = make_tensor(logits.values), make_tensor(labels.values)
        loss = model.mean_nll(logits, labels)
        pen = model.penalty(logits, labels)
        return (loss + (pen_reg * pen)).detach().numpy()
    def compute_linreg_loss(logits, labels, weight, lam):
        return ((logits - labels) ** 2).mean() + (weight * lam)
    def compute_logreg_loss(logits, labels, weight, lam):
        return F.binary_cross_entropy_with_logits(logits, labels).detach().numpy() + (weight * lam)
        
    def compute_mlp_loss(logits, labels, weight_pen, lam):
        logits, labels = make_tensor(logits.values), make_tensor(labels.values)
        return F.binary_cross_entropy_with_logits(logits, labels).detach().numpy() + (weight_pen * lam).detach().numpy()
    
    
    resdf = true_resdf.copy(deep=True)
    resdf['training_loss'] = np.nan
    resdf['validation_loss'] = np.nan
                                                                               
    for resid, row in resdf.iterrows():
        
        #Load the data
        train_data, train_labels, val_data, val_labels, _, _ = split_data(row)
        
        if (row['Algo'] == 'irm') or (row['Algo'] == 'linear_irm') :                                                                         
            #Load the model
            if (row['Algo'] == 'irm'):
                try:
                    src = models.InvariantRiskMinimization()
                    params = torch.load(join(res_dir, row['phi']))
                except:
                    resdf.drop(resid)
                    continue
                                      
            elif (row['Algo'] == 'linear_irm'):
                try:
                    src = models.LinearInvariantRiskMinimization()
                    params = torch.load(join(res_dir, row['phi']))
                except:
                    resdf.drop(resid)
                    continue
                     
            train_logits = src.predict(train_data.values, params, hid_layers=row['HidLayers'])
            train_loss = compute_irm_loss(src, train_logits, \
                                      train_labels, row['PenWeight'])
            val_logits = src.predict(val_data.values, params, hid_layers=row['HidLayers'])
            val_loss = compute_irm_loss(src, val_logits, \
                                      val_labels, row['PenWeight'])
            
        elif (row['Algo'] == 'linreg'):
            src = models.Linear()
            coeffs = pd.read_pickle(join(res_dir, row['regressors']))
            weight = src.get_weight_norm(coeffs)
            
            train_logits = src.predict(train_data, coeffs)
            train_loss = compute_linreg_loss(train_logits.values, train_labels.values, weight, row['Reg'])
            val_logits = src.predict(val_data, coeffs)
            val_loss = compute_linreg_loss(val_logits.values, val_labels.values, weight, row['Reg'])
              
        elif (row['Algo'] == 'logreg'):
            src = models.LogisticReg()
            coeffs = pd.read_pickle(join(res_dir, row['regressors']))
            weight = src.get_weight_norm(coeffs)
            
            train_logits = src.predict(train_data, coeffs)
            train_loss = compute_linreg_loss(train_logits.values, train_labels.values, weight, row['Reg'])
            val_logits = src.predict(val_data, coeffs)
            val_loss = compute_linreg_loss(val_logits.values, val_labels.values, weight, row['Reg'])
        
        elif (row['Algo'] == 'mlp'):
            src = models.MLP()
            weights = torch.load(join(res_dir, row['weights']))
            w_norm = src.get_weight_norm(weights, dsize=train_data.shape[1], hid_layers=row['HidLayers'])
#             import pdb; pdb.set_trace()
            train_logits = src.predict(train_data.values, weights, hid_layers=row['HidLayers'])
            train_loss = compute_mlp_loss(train_logits, train_labels, w_norm, row['L2_WeightPen'])
            val_logits = src.predict(val_data.values, weights, hid_layers=row['HidLayers'])
            val_loss = compute_mlp_loss(val_logits, val_labels, w_norm, row['L2_WeightPen'])
        
        resdf.loc[resid, 'training_loss'] = train_loss    
        resdf.loc[resid, 'validation_loss'] = val_loss
    return resdf


In [None]:
res = compute_hyperparameters(algo_params)

if (algo == 'irm') or (algo == 'linear-irm'):
    res.drop('phi', axis=1, inplace=True)
elif (algo == 'linreg') or (algo == 'logreg'):
    res.drop('regressors', axis=1, inplace=True)
elif (algo == 'mlp'):
    res.drop('weights', axis=1, inplace=True)
else: 
    raise Exception('Unimplemented Algo')

In [None]:
adult = res[res['Dataset'] == 'adult']
at = adult.sort_values(by=['training_loss'])
av = adult.sort_values(by=['validation_loss'])

german = res[res['Dataset'] == 'german']
gt = german.sort_values(by=['training_loss'])
gv = german.sort_values(by=['validation_loss'])

In [None]:
gv.head(1000)

In [None]:
drop_cols = ['Algo', 'Fteng', 'Dataset', 'ReduceDsize', 'Bin'] #['Algo', 'Fteng', 'Dataset', 'ReduceDsize', 'Bin', 'Eq_Estrat', 'Envs']  # ['Algo', 'Fteng', 'Dataset', 'ReduceDsize', 'Bin']
view_cols = ["TestSet", 'LR', 'N_Iterations', 'L2_WeightPen', 'HidLayers']  # ["TestSet", 'Reg']

tmp = gv.drop(drop_cols, axis=1)
tmp = gv.groupby(["TestSet", 'N_Iterations'])[['training_loss', 'validation_loss']].mean()
tmp.head(200)

# Test Evaluation

In [None]:
if algo in ['irm', 'linear-irm', 'icp']:
    orig_cols = ['Dataset', 'ReduceDsize', 'Eq_Estrat', 'Envs', 'Seed', 'Fteng', 'Bin', 'TestSet']
elif algo in ['linreg', 'logreg', 'mlp', 'constant']:
    orig_cols = ['Dataset', 'ReduceDsize', 'Seed', 'Fteng', 'Bin', 'TestSet']
else:
    raise Exception('Algo not implemented')

orig_results = generate_all_existing_results(orig_cols, [algo_params])
orig_results.head(25)

In [None]:
results = compute_results(algo, algo_params, orig_results, orig_cols)

In [None]:
pd.options.display.max_colwidth = 4000
results.head(20)

# Save To Excel

In [None]:
excel_fname = '{}_results.xlsx'.format(algo)
results.to_excel(join(join(join(join(os.getcwd(), res_dir), algo), 'analysis'), excel_fname))