### Import Packages and Set Global Variables

In [None]:
import time
import math
import copy
import torch
import pickle
import random
import warnings
import numpy as np
import pandas as pd
import scienceplots
import torch.nn as nn
import torch.optim as opt
import matplotlib.cm as cm
import matplotlib.pyplot as plt

from copy import deepcopy
from torch.autograd import grad
from scipy.stats import spearmanr
from sklearn.linear_model import SGDClassifier
from matplotlib.ticker import FormatStrFormatter
from folktables import ACSDataSource, ACSPublicCoverage
from sklearn.metrics import mean_absolute_error, log_loss, accuracy_score

warnings.filterwarnings("ignore")

E = math.e

### Utility Functions

#### Plot results

In [None]:
def visualize_result(e_k_actual, e_k_estimated, ep, k_):
    plt.rcParams['figure.dpi'] = 300
    plt.style.use(['science'])
    colors = cm.cool(np.linspace(0, 1, len(e_k_estimated)))
    fig, ax = plt.subplots()
    
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.4f'))
    ax.xaxis.set_major_formatter(FormatStrFormatter('%.4f'))
    
    min_x = np.min(e_k_actual)
    max_x = np.max(e_k_actual)
    min_y = np.min(e_k_estimated)
    max_y = np.max(e_k_estimated)
    
    z = np.polyfit(e_k_actual,  e_k_estimated, 1)
    p = np.poly1d(z)
    xx = np.linspace(-p(2)/p(1), max(e_k_actual)+.0001)
    yy = np.polyval(p, xx)
    
    ax.plot(xx, yy, ls="-", color='k')
    
    for k in range(len(e_k_actual)):
        ax.scatter(e_k_actual[k], e_k_estimated[k], zorder=2, s=15, color=colors[k])

    ax.set_title(f'Actual vs. Estimated loss for k={k_:.2f}%', fontsize=8)
    ax.set_xlabel('Actual loss difference', fontsize=8)
    ax.set_ylabel('Estimated loss difference', fontsize=8)
   
    ax.set_xlim(min_x-.0001, max_x+.0001)
    ax.set_ylim(min_y-.0001, max_y+.0001)

    text = 'MAE = {:.03}\nP = {:.03}'.format(mean_absolute_error(e_k_actual, e_k_estimated), spearmanr(e_k_actual, e_k_estimated).correlation)
    print(text)
    plt.xticks(rotation = 45, fontsize=7, visible=True)
    plt.yticks(fontsize=7)

    plt.show()

#### Select k% of a group (based on gender)

In [None]:
def get_data_group(dfTrain, feature_set, label, k):    

    selected_group = dfTrain.loc[dfTrain['sex'] == 0]

    num_to_sample = int((k/100)*len(dfTrain))

    sampled_group = dfTrain.sample(n=num_to_sample, ignore_index=False)
    not_selected = dfTrain.drop(sampled_group.index)

    selected_group_X = sampled_group[feature_set]
    selected_group_y = sampled_group[label]

    not_selected_group_X = not_selected[feature_set]
    not_selected_group_y = not_selected[label]   
    
    return selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y


#### Get and clean Adult dataset

In [None]:
def get_adult():
    train_url = 'data/adult.data'
    test_url = 'data/adult.test'

    dfTrain = pd.read_csv(train_url, header=None, sep=',')
    dfTest = pd.read_csv(test_url, header=None, sep=',', skiprows=[0])
    
    # assign column names
    columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 
                       'relationship','race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
                       'income-class']
    
    dfTrain.columns, dfTest.columns = columns, columns 
    
    dfTrain["income-class"] = dfTrain["income-class"].str.replace(".","",regex=True)
    dfTest["income-class"] = dfTest["income-class"].str.replace(".","",regex=True)
  
    # Remove question mark
    dfTrain = dfTrain[(dfTrain.values !='?').all(axis=1)]
    dfTest = dfTest[(dfTest.values !='?').all(axis=1)]

    dfTrain = dfTrain.drop(['fnlwgt','education'], axis=1)
    dfTrain = dfTrain.drop_duplicates()
    dfTrain = dfTrain.dropna(how='any', axis=0)
    
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('State-gov', 'Government', regex=True)
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('Federal-gov', 'Government', regex=True)
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('Local-gov', 'Government', regex=True)
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('Self-emp-not-inc', 'Self-Employed', regex=True)
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('Self-emp-inc', 'Self-Employed', regex=True)
    dfTrain['workclass'] = dfTrain['workclass'].str.replace('Private', 'Privately-Employed', regex=True)
    
    dfTrain['occupation'] = dfTrain['occupation'].str.replace('Armed-Forces', 'Protective-serv', regex=True)
    
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Married-AF-spouse', 'Married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Married-civ-spouse', 'Married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Married-spouse-absent', 'Married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Divorced', 'Not-married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Never-married', 'Not-married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Separated', 'Not-married', regex=True)
    dfTrain['marital-status'] = dfTrain['marital-status'].str.replace('Widowed', 'Not-married', regex=True)
    
    dfTrain['sex'] = dfTrain['sex'].astype('category').cat.codes
    dfTrain['income-class'] = dfTrain['income-class'].astype('category').cat.codes
    dfTrain['race'] = np.where(dfTrain['race'] == ' White', 1,0)
    
    to_replace = ['workclass', 'education-num', 'marital-status', 'occupation','relationship', 'native-country']
    dfTrain = pd.get_dummies(dfTrain, columns=to_replace, drop_first = False)
    
    dfTrain = dfTrain.drop('native-country_ Holand-Netherlands', axis=1)
    dfTrain = dfTrain.drop(['workclass_ ?', 'native-country_ ?', 'occupation_ ?'], axis=1)
    
    def numericalBinary(dataset, features):
        dataset[features] = np.where(dataset[features] >= dataset[features].mean(), 1,0)

    numericalBinary(dfTrain,['age', 'hours-per-week', 'capital-gain', 'capital-loss'])
 
    
    dfTest = dfTest.drop(['fnlwgt','education'], axis=1)
    dfTest = dfTest.drop_duplicates()
    dfTest = dfTest.dropna(how='any', axis=0)
        
    dfTest['workclass'] = dfTest['workclass'].str.replace('State-gov', 'Government', regex=True)
    dfTest['workclass'] = dfTest['workclass'].str.replace('Federal-gov', 'Government', regex=True)
    dfTest['workclass'] = dfTest['workclass'].str.replace('Local-gov', 'Government', regex=True)
    dfTest['workclass'] = dfTest['workclass'].str.replace('Self-emp-not-inc', 'Self-Employed', regex=True)
    dfTest['workclass'] = dfTest['workclass'].str.replace('Self-emp-inc', 'Self-Employed', regex=True)
    dfTest['workclass'] = dfTest['workclass'].str.replace('Private', 'Privately-Employed', regex=True)
    
    dfTest['occupation'] = dfTest['occupation'].str.replace('Armed-Forces', 'Protective-serv', regex=True)
    
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Married-AF-spouse', 'Married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Married-civ-spouse', 'Married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Married-spouse-absent', 'Married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Divorced', 'Not-married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Never-married', 'Not-married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Separated', 'Not-married', regex=True)
    dfTest['marital-status'] = dfTest['marital-status'].str.replace('Widowed', 'Not-married', regex=True)
    
    dfTest['sex'] = dfTest['sex'].astype('category').cat.codes
    dfTest['income-class'] = dfTest['income-class'].astype('category').cat.codes
    dfTest['race'] = np.where(dfTest['race'] == ' White', 1,0)
    
    dfTest = pd.get_dummies(dfTest, columns=to_replace, drop_first = False)

    numericalBinary(dfTest,['age', 'hours-per-week', 'capital-gain', 'capital-loss'])
    
    dfTest = dfTest.drop(['workclass_ ?', 'native-country_ ?', 'occupation_ ?'], axis=1)
    
    data = pd.concat([dfTrain, dfTest], axis=0, ignore_index=False)
    data = data.sample(frac=1, ignore_index=True)
    

    num_train = int(len(data) * .8)
    dfTrain = data.sample(n=num_train, replace=False, axis=0, ignore_index=False)

    dfTest = data.drop(dfTrain.index, axis=0)
    
    dfTrain = dfTrain.dropna(how='any', axis=0)
    dfTest = dfTest.dropna(how='any', axis=0)

    label = 'income-class'
    return dfTrain, dfTest, label

#### Sigmoid function

In [None]:
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))

### Randomized Response
Get the corresponding p and q values based on an epsilon value

In [None]:
def get_p(epsilon):
    p = float(E ** epsilon) / float(1 + (E ** epsilon))
    q = 1-p
    
    return p, q

### Models
Pytorch logistic regression model used in calculating the influence function

In [None]:
class LogReg(torch.nn.Module):
    def __init__(self, num_features, scikit_model):
        super(LogReg, self).__init__()
        
        self.fc1 = torch.nn.Linear(num_features, 1, bias=False)
        self.criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
        
        weights = torch.from_numpy(scikit_model.coef_).type(torch.float32)
        biases = torch.from_numpy(np.array(scikit_model.intercept_)).type(torch.float32)

        with torch.no_grad():
            self.fc1.weight = nn.Parameter(weights)
            self.fc1.bias = nn.Parameter(biases)

    def forward(self, x):
        logits = self.fc1(x)

        return logits

### Influence Calculation Functions


#### Main function for calling all required parts to calculate the influence score

In [None]:
def calc_influence_single(scikit_model, epsilon, train_data, test_data, group_data, device, num_features, criterion):
        
    torch_model = LogReg(num_features, scikit_model)
    torch_model.to(device)
    
    start = time.time()
    est_hess = explicit_hess(torch_model, train_data, device, criterion)
    
    grad_test = grad_z([test_data[0], test_data[1]], torch_model, device, criterion)
 
    s_test_vec = torch.mm(grad_test[0], est_hess.to(device))

    p, q = get_p(epsilon) 
    
    p_01, p_10 = q, q
    
    S_pert = 1 - group_data[1]
    
    y_w_group_pert = pd.concat([group_data[3], S_pert], axis = 0, ignore_index=True)
    y_wo_pert = pd.concat([group_data[3], group_data[1]], axis = 0, ignore_index=True)
    reconstructed_x = pd.concat([group_data[2], group_data[0]], axis = 0, ignore_index=True)
  
    assert len(S_pert) == len(group_data[1])
    grad_z_vec = grad_training([group_data[0], group_data[1]], S_pert, torch_model, device, epsilon)
   
    influence = torch.dot(s_test_vec.flatten(), grad_z_vec[0].flatten()) * (1/len(reconstructed_x))
    end = time.time() - start
    
    return influence.cpu().detach().numpy(), end

#### Explicitly calculate the Hessian matrix

In [None]:
def explicit_hess(model, train_data, device, criterion):

    logits = model(train_data[0])
    loss = criterion(logits.ravel(), train_data[1]) #reduction mean

    grads = grad(loss, model.parameters(), retain_graph=True, create_graph=True)

    hess_params = torch.zeros(len(model.fc1.weight[0]), len(model.fc1.weight[0]))
    
    for i in range(len(model.fc1.weight[0])):
        hess_params_ = grad(grads[0][0][i], model.parameters(), retain_graph=True)[0][0]
        for j, hp in enumerate(hess_params_):
            hess_params[i,j] = hp

    inv_hess = torch.linalg.inv(hess_params)
    return inv_hess

#### Get the gradient of the test data

In [None]:
def grad_z(test_data, model, device, criterion):

    model.eval()

    logits = model(test_data[0])
    loss = criterion(logits.ravel(), test_data[1]) # reduction mean
    
    return grad(loss, model.parameters())

#### Get the gradient of the training data

In [None]:
def grad_training(train_data, y_perts, model, device, epsilon):
    
    criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')
    
    train_data_features = torch.FloatTensor(train_data[0].values).to(device)
    train_data_labels = torch.FloatTensor(train_data[1].values).to(device)
    train_pert_data_labels = torch.FloatTensor(y_perts.values).to(device)
    
    model.eval()
    
    logits = model(train_data_features)

    orig_loss = criterion(logits.ravel(), train_data_labels)
    pert_loss = criterion(logits.ravel(), train_pert_data_labels)
    loss = float(1/(1 + (E ** epsilon)))*(pert_loss - orig_loss)
    
    to_return = grad(loss, model.parameters())
    
        
    return to_return

### Main Function

In [None]:
def Main(dataset, epsilons, ks, num_rounds):

    device = 'cuda:7' if torch.cuda.is_available() else 'cpu'
    criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
    
    all_orig_loss_e_k = []
    all_est_loss_e_k = []
    all_time = []
    
    for nr in range(num_rounds):
        print(f'\nRound {nr+1}')
        ############
        # Get data #
        ############
        print('\nGetting Data...')
        
        dfTrain, dfTest, label = get_adult()

        feature_set = list(set(dfTrain.columns) - {label})
        num_features = len(feature_set)

        X_train, X_test = dfTrain[feature_set].values, dfTest[feature_set].values
        y_train, y_test = dfTrain[label].values, dfTest[label].values
    
        x_test_input = torch.FloatTensor(X_test).to(device)
        y_test_input = torch.FloatTensor(y_test).to(device)

        x_train_input = torch.FloatTensor(X_train).to(device)
        y_train_input = torch.FloatTensor(y_train).to(device)
   
        ##############################################
        # Train original model and get original loss #
        ##############################################
        print('Training original model...')
        LR = SGDClassifier(loss='log_loss', penalty='None', eta0=0.01, fit_intercept=False, learning_rate='constant')
        LR.fit(X_train, y_train)
        
        model_to_send = deepcopy(LR)
        
        predictions = LR.predict_proba(X_test)
        label_predictions = [np.argmax(p) for p in predictions]
      
        acc_ori = accuracy_score(y_test, label_predictions)
        test_loss_ori = log_loss(y_test, predictions, eps=1e-15, labels=[0,1])
        
        e_k_act_losses = []
        e_k_est_losses = []
        influence_time = []
                
        ################################################################
        # Perform influence and retraining for all epsilons a k values #
        ################################################################
        print('\nBegining epsilon and k rounds')
        print('-----------------------------')
        for ep in epsilons:
            print(f'\nEpsilon: {ep}')
            
            k_act_losses = []
            k_est_losses = []
            inf_time = []
            
            for k in ks:
                # Influence
                print(f'k: {k:.2f}')
                selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y = get_data_group(dfTrain, feature_set, label, k)
                loss_diff_approx, tot_time = calc_influence_single(model_to_send, ep, [x_train_input, y_train_input], [x_test_input, y_test_input], [selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y], device, num_features, criterion)
                print(f'Approx difference: {loss_diff_approx:.5f}')
              
                ###########
                # Retrain #
                ###########
                
                p, q = get_p(ep)
                
                pert_selected_group_y = []
                
                for i, elem in enumerate(selected_group_y.values):
                    rnd = np.random.random()
                    if rnd <= p:
                        pert_selected_group_y.append(elem)
                    else:
                        pert_selected_group_y.append(1 - elem)

                y_w_group_pert = pd.concat([not_selected_group_y, pd.DataFrame(pert_selected_group_y)], axis = 0, ignore_index=True)
                y_wo_pert = pd.concat([not_selected_group_y, selected_group_y], axis = 0, ignore_index=True)
                reconstructed_x = pd.concat([not_selected_group_X, selected_group_X], axis = 0, ignore_index=True)
                
                pert_LR = SGDClassifier(loss='log_loss', penalty='None', eta0=0.01, fit_intercept=False, learning_rate='constant')
                pert_LR.fit(reconstructed_x, y_w_group_pert)
                pert_param = LR.coef_

                pert_predictions = pert_LR.predict_proba(X_test)
                pert_label_predictions = [np.argmax(p) for p in pert_predictions]

                acc_pert = accuracy_score(y_test, pert_label_predictions)
                test_loss_retrain = log_loss(y_test, pert_predictions, eps=1e-15, labels=[0,1])

                 # get true loss diff
                loss_diff_true = test_loss_retrain - test_loss_ori
                print(f'True difference: {loss_diff_true:.5f}')
                k_act_losses.append(loss_diff_true)
                k_est_losses.append(loss_diff_approx)
                inf_time.append(tot_time)
            
            e_k_act_losses.append(k_act_losses)
            e_k_est_losses.append(k_est_losses)
            influence_time.append(inf_time)
            
        all_orig_loss_e_k.append(e_k_act_losses)
        all_est_loss_e_k.append(e_k_est_losses) 
        all_time.append(influence_time)
    
    return all_orig_loss_e_k, all_est_loss_e_k, all_time

### Perform Experiment 

#### Constants

In [None]:
epsilons = np.linspace(.001, 5, 30)
k = np.linspace(1, 30, 10)
rounds = 10

#### Run experiment and save results to pickle file

In [None]:
all_orig_loss_e_k, all_est_loss_e_k, all_time = Main('adult', epsilons, k, rounds)

with open('results/adult/all_orig_loss_e_k_adult.txt', "wb") as file:   #Pickling
    pickle.dump(all_orig_loss_e_k, file)

with open('results/adult/all_est_loss_e_k_adult.txt', "wb") as file2:   #Pickling
    pickle.dump(all_est_loss_e_k, file2)
    
with open('results/adult/all_time_adult.txt', "wb") as file3:   #Pickling
    pickle.dump(all_time, file3)

#### Reorganize results for visualization

In [None]:
sum_orig_loss_e_k = [[0 for _ in range(len(k))] for _ in range(len(epsilons))]
sum_est_loss_e_k = [[0 for _ in range(len(k))] for _ in range(len(epsilons))]
sum_time = [[0 for _ in range(len(k))] for _ in range(len(epsilons))]

avg_orig_loss = []
avg_est_loss = []
avg_time = []

for round_ in range(len(all_orig_loss_e_k)):
    for e in range(len(epsilons)):
        for k_ in range(len(k)):
            sum_orig_loss_e_k[e][k_] = sum_orig_loss_e_k[e][k_] + all_orig_loss_e_k[round_][e][k_]
            sum_est_loss_e_k[e][k_] = sum_est_loss_e_k[e][k_] + all_est_loss_e_k[round_][e][k_]
            

for e in range(len(epsilons)):
    avg_orig_loss.append([ elem / len(all_orig_loss_e_k) for elem in sum_orig_loss_e_k[e]])
    avg_est_loss.append([elem/ len(all_orig_loss_e_k) for elem in sum_est_loss_e_k[e]])

k_e_orig = [[] for _ in range(len(k))]
k_e_est = [[] for _ in range(len(k))]

for e in range(len(epsilons)):
    for k_ in range(len(k)):
        k_e_orig[k_].append(avg_orig_loss[e][k_])
        k_e_est[k_].append(avg_est_loss[e][k_])


#### Visualize results

In [None]:
for i in range(len(k_e_orig)):
    visualize_result(k_e_orig[i], k_e_est[i], epsilons, k[i])