In [1]:
import time
import math
import copy
import torch
import pickle
import random
import logging
import warnings
import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import torch.nn as nn
import torch.optim as opt
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from torch.autograd import grad
from torch.autograd.functional import vhp
from get_datasets import get_diabetes, get_adult, get_law
from torch.utils.data import Subset, DataLoader
from sklearn.metrics import mean_absolute_error, r2_score, accuracy_score
from scipy.stats import spearmanr

plt.rcParams['figure.dpi'] = 300
warnings.filterwarnings("ignore")

E = math.e

### Utility Functions

In [2]:
def graph_k(data, ep, real_est):
    title = f'{real_est} Loss vs. k for epsilon = {ep}'
    sns.set(font_scale=1)
    palette = sns.color_palette("colorblind")
    ticks = np.arange(0, len(data[-1]), step=1)
    
    plt.xticks(ticks=ticks, labels=data[-1], rotation='vertical')

    if real_est == 'Real':
        plt.plot([x[0] for x in data[0][0]], color=palette[0], linestyle='-', linewidth=2.0, label='G1')
        #plt.plot([x[0] for x in data[0][1]], color=palette[1], linestyle='-', linewidth=2.0, label='G2')
#         plt.plot([x[0] for x in data[0][2]], color=palette[2], linestyle='-', linewidth=2.0, label='G3')
#         plt.plot([x[0] for x in data[0][3]], color=palette[3], linestyle='-', linewidth=2.0, label='G4')
#         plt.plot([x[0] for x in data[0][4]], color=palette[4], linestyle='-', linewidth=2.0, label='G5')
    else:
        plt.plot([x[1] for x in data[0][0]], color=palette[0], linestyle='-', linewidth=2.0, label='G1')
        #plt.plot([x[1] for x in data[0][1]], color=palette[1], linestyle='-', linewidth=2.0, label='G2')
#         plt.plot([x[1] for x in data[0][2]], color=palette[2], linestyle='-', linewidth=2.0, label='G3')
#         plt.plot([x[1] for x in data[0][3]], color=palette[3], linestyle='-', linewidth=2.0, label='G4')
#         plt.plot([x[1] for x in data[0][4]], color=palette[4], linestyle='-', linewidth=2.0, label='G5')
        
    plt.xlabel('k')
    plt.ylabel('Loss')
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [3]:
def graph_ep(data, k, real_est):
    
    title = f'{real_est} Loss vs. epsilon for k = {k}'
    sns.set(font_scale=1)
    palette = sns.color_palette("colorblind")
    ticks = np.arange(0, len(data[-1]), step=1)
    plt.xticks(ticks=ticks, labels=data[-1], rotation='vertical')
    
    if real_est == 'Real':
        plt.plot([x[0] for x in data[0][0]], color=palette[0], linestyle='-', linewidth=2.0, label='G1')
        #plt.plot([x[0] for x in data[0][1]], color=palette[1], linestyle='-', linewidth=2.0, label='G2')
#         plt.plot([x[0] for x in data[0][2]], color=palette[2], linestyle='-', linewidth=2.0, label='G3')
#         plt.plot([x[0] for x in data[0][3]], color=palette[3], linestyle='-', linewidth=2.0, label='G4')
#         plt.plot([x[0] for x in data[0][4]], color=palette[4], linestyle='-', linewidth=2.0, label='G5')
    else:
        plt.plot([x[1] for x in data[0][0]], color=palette[0], linestyle='-', linewidth=2.0, label='G1')
        #plt.plot([x[1] for x in data[0][1]], color=palette[1], linestyle='-', linewidth=2.0, label='G2')
#         plt.plot([x[1] for x in data[0][2]], color=palette[2], linestyle='-', linewidth=2.0, label='G3')
#         plt.plot([x[1] for x in data[0][3]], color=palette[3], linestyle='-', linewidth=2.0, label='G4')
#         plt.plot([x[1] for x in data[0][4]], color=palette[4], linestyle='-', linewidth=2.0, label='G5')
     
    plt.xlabel('epsilon')
    plt.ylabel('Loss')
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [4]:
def visualize_result(e_g_k, ep, title):
    
    # actual_loss_diff = [group 1: [k=1, k=12, ....], group 2: [k=1, k=12, ...]]
    fig, ax = plt.subplots()
    palette = sns.color_palette("colorblind")
    
    actual = [[x[0] for x in e_g_k[0]]]#, [x[0] for x in e_g_k[1]]]#, [x[0] for x in e_g_k[2]], [x[0] for x in e_g_k[3]], [x[0] for x in e_g_k[4]]]
    estimated = [[x[1] for x in e_g_k[0]]]#, [x[1] for x in e_g_k[1]]]#, [x[1] for x in e_g_k[2]], [x[1] for x in e_g_k[3]], [x[1] for x in e_g_k[4]]]

    actual_all = []
    estimated_all = []
    spearman_all = []
    mae_all = []
    
    for x in range(len(actual)):
        actual_all.extend(actual[x])
        estimated_all.extend(estimated[x])
    
    for i in range(2):
        spearman_all.append(spearmanr(actual[i], estimated[i]).correlation)
        mae_all.append(mean_absolute_error(actual[i], estimated[i]))
        
    max_abs = np.max([np.abs(actual_all), np.abs(estimated_all)])
    min_, max_ = -max_abs * 1.1, max_abs * 1.1
    
    plt.rcParams['figure.figsize'] = 6, 5
    
    ax.scatter(actual[0], estimated[0], zorder=2, s=10, color = palette[0], label='G1')
    #ax.scatter(actual[1], estimated[1], zorder=2, s=10, color = palette[1], label='G2')
#     ax.scatter(actual[2], estimated[2], zorder=2, s=10, color = palette[2], label='G3')
#     ax.scatter(actual[3], estimated[3], zorder=2, s=10, color = palette[3], label='G4')
#     ax.scatter(actual[4], estimated[4], zorder=2, s=10, color = palette[4], label='G5')

    ax.set_title(f'Actual vs. Estimated loss for {title} = {ep}')
    ax.set_xlabel('Actual loss diff')
    ax.set_ylabel('Estimated loss diff')
   
    ax.set_xlim(-(np.max(np.abs(actual_all))+.05), .1)
    ax.set_ylim(-(np.max(np.abs(estimated_all))+.005), .01)
    ax.plot(ax.get_xlim(), ax.get_ylim(), ls="-", color=palette[7])
    text = 'MAE = {:.03}\nP = {:.03}'.format(sum(mae_all)/5, sum(spearman_all)/5)
    ax.text(.1-.02, -(np.max(np.abs(estimated_all))+.005)+.001, text, verticalalignment='bottom', horizontalalignment='right')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [5]:
 class CreateData(torch.utils.data.Dataset):
    def __init__(self, data, targets, pert_status):
        self.data = data
        self.targets = targets
        self.pert = pert_status

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        out_data = self.data[idx]
        out_label = self.targets[idx]
        pert_label = self.pert[idx]

        return out_data, out_label, pert_label

In [6]:
def get_data(group_method, new_train_df, feature_set, label, k):    
    if group_method == 1:
        # based on Race or Gender
        selected_group = new_train_df.loc[new_train_df['sex'] == 0]
        
        num_to_sample = round((k / 100)*len(selected_group))
       
        sampled_group = selected_group.sample(n=num_to_sample)
        not_selected = new_train_df.drop(sampled_group.index)
        
        selected_group_X = sampled_group[feature_set]
        selected_group_y = sampled_group[label]
        
        not_selected_group_X = not_selected[feature_set]
        not_selected_group_y = not_selected[label]   
    elif group_method == 2:
        # based on Race and Gender
        selected_group = new_train_df.loc[new_train_df['sex'] == 0]
        selected_group = selected_group.loc[selected_group['race_White'] == 0]
        
        num_to_sample = round((k / 100)*len(selected_group))
       
        sampled_group = selected_group.sample(n=num_to_sample)
        not_selected = new_train_df.drop(sampled_group.index)
        
        selected_group_X = sampled_group[feature_set]
        selected_group_y = sampled_group[label]
        
        not_selected_group_X = not_selected[feature_set]
        not_selected_group_y = not_selected[label]
    elif group_method == 3:
        # based on Class
        selected_group = new_train_df.loc[new_train_df[label] == 0]
        
        num_to_sample = round((k / 100)*len(selected_group))
       
        sampled_group = selected_group.sample(n=num_to_sample)
        not_selected = new_train_df.drop(sampled_group.index)
        
        selected_group_X = sampled_group[feature_set]
        selected_group_y = sampled_group[label]
        
        not_selected_group_X = not_selected[feature_set]
        not_selected_group_y = not_selected[label]
    elif group_method == 4:
        # based on Race or Gender and Class
        selected_group = new_train_df.loc[new_train_df['sex'] == 0]
        selected_group = selected_group.loc[selected_group[label] == 0]
        
        num_to_sample = round((k / 100)*len(selected_group))
       
        sampled_group = selected_group.sample(n=num_to_sample)
        not_selected = new_train_df.drop(sampled_group.index)
        
        selected_group_X = sampled_group[feature_set]
        selected_group_y = sampled_group[label]
        
        not_selected_group_X = not_selected[feature_set]
        not_selected_group_y = not_selected[label]
    elif group_method == 5: 
        # based on Race and Gender and Class
        selected_group = new_train_df.loc[new_train_df['sex'] == 0]
        selected_group = selected_group.loc[selected_group['race_White'] == 0]
        selected_group = selected_group.loc[selected_group[label] == 0]
        
        num_to_sample = round((k / 100)*len(selected_group))
       
        sampled_group = selected_group.sample(n=num_to_sample)
        not_selected = new_train_df.drop(sampled_group.index)
        
        selected_group_X = sampled_group[feature_set]
        selected_group_y = sampled_group[label]
        
        not_selected_group_X = not_selected[feature_set]
        not_selected_group_y = not_selected[label]
    
    return selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y

### Randomized Response

In [7]:
def randomize_resp(label, epsilon):

    probability = float(E ** epsilon) / float(1 + (E ** epsilon))
    
    if label == 0:
        new_label = np.random.choice([0,1], p=[probability, 1-probability])
    else:
        new_label = np.random.choice([0,1], p=[1-probability, probability])

    return new_label

In [8]:
def get_p(epsilon):
    probability = float(E ** epsilon) / float(1 + (E ** epsilon))
    p = torch.FloatTensor([[probability, 1-probability], [1-probability, probability]])
    
    return p

### Models

In [9]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, num_features):
        super(LogisticRegression, self).__init__()
        
        self.fc1 = torch.nn.Linear(num_features, 1, bias=False)
        self.criterion = torch.nn.BCEWithLogitsLoss()
        
    def forward(self, x):
        logits = self.fc1(x)

        return logits
    
    def loss(self, logits, y):
        loss = self.criterion(logits.ravel(), y)
        
        probabilities = torch.sigmoid(logits)
        thresh_results = []
        
        for p in probabilities:
            if p>.5:
                thresh_results.append(1)
            else:
                thresh_results.append(0)
                
        num_correct = 0
        for r,y_ in zip(thresh_results, y):
            if r == y_:
                num_correct += 1
                
        acc = num_correct / len(y)
        
        return loss, acc

In [10]:
def train(model, dataset, epsilon, lengths):
    model.train()
    opt = torch.optim.SGD(model.parameters(), lr=.005, weight_decay=0)
    
    criterion1 = torch.nn.BCEWithLogitsLoss()
    
    pert_status = np.zeros(len(dataset[0]))
            
    train_data = CreateData(dataset[0], dataset[1], pert_status)
    train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True)

    for itr in range(0, 7):
        for i, [x,y,p] in enumerate(train_dataloader):
            opt.zero_grad()
            oupt = model(x)
            
            try:
                loss_val = criterion1(oupt.ravel(), y)
            except ValueError:
                loss_val = criterion1(oupt, y)
            
            loss_val.backward()
            opt.step() 
            
    return model

### Influence Calculation Functions


In [11]:
def calc_influence_single(model, epsilon, train_data, test_data, group_data, device, rec_depth, r, damp, scale, est_hess, num_features, weight_decay, criterion):
    est_hess = inv_hess(model, [train_data[0], train_data[1]], device, rec_depth, r, criterion)
    
    grad_test = grad_z([test_data[0], test_data[1]], model, device, criterion)
    s_test_vec = torch.mm(grad_test[0], est_hess)

    P = get_p(epsilon)
    
    p_01, p_10 = P[0][1].item(), P[1][0].item()
    
    pi_1 = sum(list(group_data[1]))
    pi_0 = len(group_data[1]) - pi_1
    
    lam_0 = round(p_01 * pi_1)
    lam_1 = round(p_10 * pi_0)

    S_pert = 1 - group_data[1]
    
    y_w_group_pert = pd.concat([group_data[3], S_pert], axis = 0, ignore_index=True)
    y_wo_pert = pd.concat([group_data[3], group_data[1]], axis = 0, ignore_index=True)
    reconstructed_x = pd.concat([group_data[2], group_data[0]], axis = 0, ignore_index=True)
  
    assert len(S_pert) == len(group_data[1])
    grad_z_vec = grad_training([group_data[0],group_data[1]], S_pert, [model], device, [lam_0, lam_1], criterion)
  
    with torch.no_grad():
        influence = -sum([torch.sum(k * j).data for k, j in zip(grad_z_vec, s_test_vec)]) * ((lam_0 + lam_1)/len(train_data[0]))
       
    return influence.cpu(), est_hess

In [12]:
def inv_hess(model, train_data, device, rec_depth, r, criterion):
    
    H_inv = []
    
    i_hess = torch.zeros([len(model.fc1.weight), len(model.fc1.weight)]).to(device)
    
    logits = model(train_data[0])
    loss = criterion(logits, torch.atleast_2d(train_data[1]).T)
    
    v = grad(loss, model.parameters())
    
    for i in range(r):
        
        inv_hess = v[0]
        
        for j in range(rec_depth):
         
            rand_index = random.sample(range(len(train_data[0])), 1)
            rand_point = [train_data[0][rand_index], train_data[1][rand_index]]
            
            a = -1.0*rand_point[1]*torch.matmul(v[0], rand_point[0].T) 
            output = rand_point[1]*((torch.pow(torch.exp(a),0.5))/(torch.exp(a)+1))*rand_point[0]
            
            batch_hess = torch.outer(output.ravel(), output.ravel()) 
            batch_hess = torch.add(batch_hess, 2*1e-4*torch.eye(len(v[0])).to(device))
            
            inv_hess = torch.add(inv_hess, batch_hess)
        
        H_inv.append(inv_hess)

    with torch.no_grad():
        for i in range(r):
            i_hess = torch.add(i_hess, H_inv[i])
        
        i_hess = i_hess / r

    return i_hess

In [13]:
def grad_z(test_data, model, device, criterion):

    model.eval()

    test_data_features = test_data[0]
    test_data_labels = test_data[1]

    logits = model(test_data_features)
    loss = criterion(logits, torch.atleast_2d(test_data_labels).T)
    
    return grad(loss, model.parameters())

In [14]:
def grad_training(train_data, y_perts, parameters, device, epsilon, criterion):
    
    lam_0, lam_1 = epsilon
    lam = lam_0 + lam_1
    len_s = len(y_perts)
    
    train_data_features = torch.FloatTensor(train_data[0].values).to(device)
    train_data_labels = torch.FloatTensor(train_data[1].values).to(device)
    train_pert_data_labels = torch.FloatTensor(y_perts.values).to(device)
    
    model = parameters[0]
    model.eval()

    logits = model(train_data_features)

    orig_loss = criterion(logits, torch.atleast_2d(train_data_labels).T)
    pert_loss = criterion(logits, torch.atleast_2d(train_pert_data_labels).T)
    loss = (lam / len_s)*(pert_loss -  orig_loss)

    to_return = grad(loss, model.parameters())
    
        
    return to_return

### Main Function

In [15]:
def Main(epsilon, weight_decay, rec_depth, r, scale, damp, est_hess, dataset, group_method, k):

    if dataset == 'adult':
        data = get_adult()
        label = 'income_class'
    elif dataset == 'diabetes':
        data = get_diabetes()
        label = 'readmitted'
    else:
        data = get_law()
        label = 'admit'

    device = 'cuda:3' if torch.cuda.is_available() else 'cpu'
    criterion = torch.nn.BCEWithLogitsLoss()
    
    feature_set = set(data.columns) - {label}
    num_features = len(feature_set)
    
    X = data[feature_set]
    y = data[label]

    if dataset == 'diabetes':
        undersample = RandomUnderSampler(random_state=42)
        new_X, new_y = undersample.fit_resample(X, y)
    else:
        new_X = X
        new_y = y

    X_train, X_test, y_train, y_test = train_test_split(new_X, new_y, test_size=0.20, random_state=42)
  
    new_train_df = pd.concat([X_train, y_train], axis=1)
  
    train_sample_num = len(X_train)

    selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y = get_data(group_method, new_train_df, feature_set, label, k)
    
    x_test_input = torch.FloatTensor(X_test.values).to(device)
    y_test_input = torch.FloatTensor(y_test.values).to(device)

    x_train_input = torch.FloatTensor(X_train.values).to(device)
    y_train_input = torch.FloatTensor(y_train.values).to(device)
   
    torch_model = LogisticRegression(num_features)
    torch.save(torch_model.state_dict(), 'models/initial_config.pth')
    torch_model.to(device)
    torch_model = train(torch_model, [x_train_input, y_train_input], epsilon, None)
    
    test_loss_ori, acc_ori = torch_model.loss(torch_model(x_test_input), y_test_input)

    loss_diff_approx, est_hess = calc_influence_single(torch_model, epsilon, [x_train_input, y_train_input], [x_test_input, y_test_input], [selected_group_X, selected_group_y, not_selected_group_X, not_selected_group_y], device, rec_depth, r, damp, scale, est_hess, num_features, weight_decay, criterion)
    loss_diff_approx = - torch.FloatTensor(loss_diff_approx).cpu().numpy()

    # retrain
    P = get_p(epsilon)
    
    p_01, p_10 = P[0][1].item(), P[1][0].item()
    
    pi_1 = sum(list(selected_group_y))
    pi_0 = len(selected_group_y) - pi_1
    
    lam_0 = round(p_01 * pi_1)
    lam_1 = round(p_10 * pi_0)

    S = pd.concat([selected_group_X, selected_group_y], axis=1, ignore_index=False)

    G0 = S[label][S[label].eq(1)].sample(lam_0).index
    G1 = S[label][S[label].eq(0)].sample(lam_1).index
  
    G = S.loc[G0.union(G1)]
    not_g = S.drop(G0.union(G1))
    
    G_pert = 1 - selected_group_y
    
    y_w_group_pert = pd.concat([not_selected_group_y, not_g[label], G_pert], axis = 0, ignore_index=True)
    y_wo_pert = pd.concat([not_selected_group_y, not_g[label], G[label]], axis = 0, ignore_index=True)
    reconstructed_x = pd.concat([not_selected_group_X, not_g[feature_set], G[feature_set]], axis = 0, ignore_index=True)
    
    model_pert = LogisticRegression(num_features)
    model_pert.load_state_dict(torch.load('models/initial_config.pth'))
    model_pert.to(device)
    model_pert = train(model_pert, [torch.FloatTensor(reconstructed_x.values).to(device), torch.FloatTensor(y_w_group_pert.values).to(device)], epsilon, None)
    test_loss_retrain, acc_retrain = model_pert.loss(model_pert(x_test_input), y_test_input)

     # get true loss diff
    loss_diff_true = test_loss_retrain - test_loss_ori
    print(f"Original/Retrain Loss: {test_loss_ori:.3f}/{test_loss_retrain:.3f}")
    est_loss_diff = loss_diff_approx
    avg_loss_diff = loss_diff_true.detach().cpu().numpy()

    return avg_loss_diff, est_loss_diff, est_hess

### Perform Experiment 

#### Constants

In [16]:
epsilons = [.01, .1, 1] #.05, .1, .15, .2, .25, .3, .35, .4, .45, .5, .55, .6, .65, .7, .75, .8, .85, .9, .95, 1]

k = [25]
k_3 = np.linspace(1, 25, 5)

group_method = [1]#, 2]#, 3, 4, 5]

weight_decay = 0.01
rec_depth = 5350
r_ = 5
scale = 10
damp = 0.01 
rounds = 5

In [17]:
print(k)

[25]


In [18]:
est_hess = None

e_k_g_avg_losses = []
e_k_g_est_losses = []


for e in epsilons:
    print('\nWorking on epsilon: ', e)
    
    k_g_avg_losses = []
    k_g_est_losses = []
    
    for i, k_ in enumerate(k):
        g_avg_losses = []
        g_est_losses = []
        
        for group in group_method:
            avg_losses = []
            est_losses = []
                
            for r in range(rounds):
                avg_loss_diff, est_loss_diff, est_hess = Main(e, weight_decay, rec_depth, r_, scale, damp, est_hess, 'adult', group, k_)
                print('Round: ', r, ' - ', avg_loss_diff, est_loss_diff)
                avg_losses.append(avg_loss_diff)
                est_losses.append(est_loss_diff)
            
            g_avg_losses.append(np.mean(avg_losses))
            g_est_losses.append(np.mean(est_losses))
            print('Avg. Loss: ', np.mean(avg_losses))
            print('Est. Loss: ', np.mean(est_losses))  
                  
        k_g_avg_losses.append(g_avg_losses)
        k_g_est_losses.append(g_est_losses)
    
    e_k_g_avg_losses.append(k_g_avg_losses)
    e_k_g_est_losses.append(k_g_est_losses)
    
    with open('e_k_g_avg_losses.txt', "wb") as file:   #Pickling
        pickle.dump(e_k_g_avg_losses, file)
        
    with open('e_k_g_est_losses.txt', "wb") as file2:   #Pickling
        pickle.dump(e_k_g_est_losses, file2)


Working on epsilon:  0.01
Original/Retrain Loss: 0.341/0.361
Round:  0  -  0.020061463 780.223


KeyboardInterrupt: 

In [None]:
with open('e_k_g_est_losses.txt', 'rb') as f:
    e_k_g_est_losses = pickle.load(f)
    
with open('e_k_g_avg_losses.txt', 'rb') as f:
    e_k_g_avg_losses = pickle.load(f)

In [None]:
# [actual, estimate]
e_g_k = [[[[[] for R in range(2)] for u in range(len(k))] for _ in range(len(group_method))] for p in range(len(epsilons))]
k_g_e = [[[[[] for R in range(2)] for u in range(len(epsilons))] for _ in range(len(group_method))] for p in range(len(k))]

for e_ in range(len(epsilons)):
    k_g_avg = e_k_g_avg_losses[e_]
    k_g_est = e_k_g_est_losses[e_]
    
    for k_ in range(len(k_g_avg)):
        
        g_avg = k_g_avg[k_]
        g_est = k_g_est[k_]
        
        for g_ in range(len(g_avg)):
                e_g_k[e_][g_][k_][0] = g_avg[g_]
                e_g_k[e_][g_][k_][1] = g_est[g_]
                    
                k_g_e[k_][g_][e_][0] = g_avg[g_]
                k_g_e[k_][g_][e_][1] = g_est[g_]

In [None]:
with open('label-only-egk.txt', "wb") as fp:   #Pickling
    pickle.dump(e_g_k, fp)
    
with open('label-only-egk.txt', "wb") as p:   #Pickling
    pickle.dump(k_g_e, p)

In [None]:
# graph_k([e_g_k[5], k], str(epsilons[5]), 'Real')
# graph_k([e_g_k[5], k], str(epsilons[5]), 'Est.')
# graph_ep([k_g_e[5], epsilons], str(k[5]), 'Real')
# graph_ep([k_g_e[5], epsilons], str(k[5]), 'Est.')

graph_k([e_g_k[0], k], str(epsilons[0]), 'Real')
graph_k([e_g_k[0], k], str(epsilons[0]), 'Est.')
graph_ep([k_g_e[0], epsilons], str(k[0]), 'Real')
graph_ep([k_g_e[0], epsilons], str(k[0]), 'Est.')

# graph_k([e_g_k[-1], k], str(epsilons[-1]), 'Real')
# graph_k([e_g_k[-1], k], str(epsilons[-1]), 'Est.')
# graph_ep([k_g_e[-1], epsilons], str(k[-1]), 'Real')
# graph_ep([k_g_e[-1], epsilons], str(k[-1]), 'Est.')

In [None]:
graph_k([e_g_k[0], k], str(epsilons[0]), 'Real')
graph_k([e_g_k[0], k], str(epsilons[0]), 'Est.')

In [None]:
# r2_s = visualize_result(e_g_k[5], str(epsilons[5]), 'epsilon')
# r2_s = visualize_result(k_g_e[5], str(k[5]), 'k')

r2_s = visualize_result(e_g_k[0], str(epsilons[0]), 'epsilon')
r2_s = visualize_result(k_g_e[0], str(k[0]), 'k')

# r2_s = visualize_result(e_g_k[-1], str(epsilons[-1]), 'epsilon')
# r2_s = visualize_result(k_g_e[-1], str(k[-1]), 'k')

# r2_s = visualize_result(e_g_k[12], str(epsilons[12]), 'epsilon')

In [None]:
spearman_g_ep = []
spearman_g_k = []

mae_g_ep = []
mae_g_k = []
    
for i, egk in enumerate(e_g_k):
    actual = [[x[0] for x in egk[0]], [x[0] for x in egk[1]]]#, [x[0] for x in egk[2]], [x[0] for x in egk[3]], [x[0] for x in egk[4]]]
    estimated = [[x[1] for x in egk[0]], [x[1] for x in egk[1]]]#, [x[1] for x in egk[2]], [x[1] for x in egk[3]], [x[1] for x in egk[4]]]

    spearman = []
    mae = []
    
    for j in range(len(actual)):
        spearman.append(spearmanr(actual[j], estimated[j]).correlation)
        mae.append(mean_absolute_error(actual[j], estimated[j]))
        
    spearman_g_ep.append(spearman)
    mae_g_ep.append(mae)
    
for i, kge in enumerate(k_g_e):
    actual = [[x[0] for x in kge[0]], [x[0] for x in kge[1]]]#, [x[0] for x in kge[2]], [x[0] for x in kge[3]], [x[0] for x in kge[4]]]
    estimated = [[x[1] for x in kge[0]], [x[1] for x in kge[1]]]#, [x[1] for x in kge[2]], [x[1] for x in kge[3]], [x[1] for x in kge[4]]]

    spearman = []
    mae = []
    
    for j in range(len(actual)):
        spearman.append(spearmanr(actual[j], estimated[j]).correlation)
        mae.append(mean_absolute_error(actual[j], estimated[j]))
        
    spearman_g_k.append(spearman)
    mae_g_k.append(mae)


In [None]:
len(spearman_g_ep), len(spearman_g_ep[0])
spearman_g_ep

In [None]:
len(spearman_g_k), len(spearman_g_k[0])
spearman_g_k

In [None]:
len(mae_g_ep), len(mae_g_ep[0])
mae_g_ep

In [None]:
len(mae_g_k), len(mae_g_k[0])
mae_g_k

In [None]:
rank_est = [22, 20,23,21,19,18,16,17,15,14,12,13,11,10,9,8,7,6,4,5,3,2,1]
rank_act = [23,20,21,22,17,18,16,19,14,15,13,12,11,10,8,7,9,6,4,5,2,3,1]
a= np.cov(np.array(rank_est), np.array(rank_act), bias=True)[0][1]

In [None]:
b = np.std(np.array(rank_est))

In [None]:
c = np.std(np.array(rank_act))

In [None]:
a /(b*c)

In [None]:
print(a, b, c)