In [4]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

#%matplotlib nbagg
#%matplotlib notebook
#%matplotlib qt
#%pylab qt

Populating the interactive namespace from numpy and matplotlib


In [85]:
def plot_results(expname, xcoeffs, ycoeffs, lr_baseline, rf_baseline, title):
    results0 = np.load("results/{}_0_score_mat.npy".format(expname))
    results1 = np.load("results/{}_1_score_mat.npy".format(expname))
    results2 = np.load("results/{}_2_score_mat.npy".format(expname))
    results3 = np.load("results/{}_3_score_mat.npy".format(expname))
    results4 = np.load("results/{}_4_score_mat.npy".format(expname))
    results = (results0 + results1 + results2 + results3 + results4) / 5
    
    validresults0 = np.load("results/valid/{}_0_valid_score_mat.npy".format(expname))
    validresults1 = np.load("results/valid/{}_1_valid_score_mat.npy".format(expname))
    validresults2 = np.load("results/valid/{}_2_valid_score_mat.npy".format(expname))
    validresults3 = np.load("results/valid/{}_3_valid_score_mat.npy".format(expname))
    validresults4 = np.load("results/valid/{}_4_valid_score_mat.npy".format(expname))
    validresults = (validresults0 + validresults1 + validresults2 + validresults3 + validresults4) / 5
    
    if results.shape[0] == 7: # revert to old xcoeffs and ycoeffs
        xcoeffs = [0.0, 0.5, 1.0, 3.0, 5.0, 10.0, 15.0]
        ycoeffs = [0.0, 0.002, 0.005, 0.01, 0.03, 0.05, 0.07, 0.1]
    elif results.shape[0] == 8:
        xcoeffs = [0.0, 0.1, 0.5, 1.0, 3.0, 5.0, 10.0, 15.0]
        ycoeffs = [0.0, 0.0005, 0.001, 0.002, 0.003, 0.005, 0.007, 0.01, 0.03, 0.05, 0.1]
    
    X = np.repeat(xcoeffs, len(ycoeffs)).reshape((len(xcoeffs), len(ycoeffs)))
    Y = np.repeat(ycoeffs, len(xcoeffs)).reshape((len(xcoeffs), len(ycoeffs)), order='F')
    xmesh = np.linspace(xcoeffs[0], xcoeffs[-1])
    ymesh = np.linspace(ycoeffs[0], ycoeffs[-1])
    xmeshgrid = np.repeat(xmesh, len(ymesh)).reshape((len(xmesh), len(ymesh)))
    ymeshgrid = np.repeat(ymesh, len(xmesh)).reshape((len(xmesh), len(ymesh)), order='F')
    
    logreg = np.ones(xmeshgrid.shape) * lr_baseline
    rfsurface = np.ones(xmeshgrid.shape) * rf_baseline
    
    fig = plt.figure(figsize = (11, 6))
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X, Y, results, cmap=cm.coolwarm, linewidth=0, antialiased=False, edgecolor='black')
    validsurf = ax.plot_surface(X, Y, validresults, cmap=cm.gist_gray, linewidth=0, antialiased=False, edgecolor='black')
    plane = ax.plot_wireframe(xmeshgrid, ymeshgrid, logreg, color='green', linewidth=.5, antialiased=False)
    rfplane = ax.plot_wireframe(xmeshgrid, ymeshgrid, rfsurface, color='purple', linewidth=.5, antialiased=False)
    #surf.set_label('Multi-Study LAFTR')
    plane.set_label('Logistic regression baseline')
    rfplane.set_label('Random forest baseline')
    plt.xlabel('Fairness coefficient')
    plt.ylabel('Reconstruction coefficient')
    plt.title(title)
    ax.set_zlabel('Error')
    cbar1 = fig.colorbar(surf, shrink=0.5, aspect=5)
    cbar2 = fig.colorbar(validsurf, shrink=0.5, aspect=5)
    
    cbar1.ax.set_ylabel('LAFTR test error', rotation=90)
    cbar2.ax.set_ylabel('LAFTR validation error', rotation=90)

    plt.legend(loc='lower right')
    plt.show()

def lr_baseline(dataset):
    data = np.load('/Users/Frances/Documents/seas-fellowship/rvr/data/{}.npz'.format(dataset))
    
    x_train = data['x_train'][data['train_inds']]
    y_train = data['y_train'][data['train_inds']]
    x_valid = data['x_train'][data['valid_inds']]
    y_valid = data['y_train'][data['valid_inds']]
    x_test = data['x_test']
    y_test = data['y_test']

    yidx = 0
    
    modelall = LogisticRegression()
    modelall.fit(x_train, y_train[:, yidx])
    return (1 - modelall.score(x_test, y_test[:,yidx]))

def rf_baseline(dataset, n_estimators=250, min_samples_split=2, max_features=12, 
                random_state=0):
    data = np.load('/Users/Frances/Documents/seas-fellowship/rvr/data/{}.npz'.format(dataset))
    
    x_train = data['x_train'][data['train_inds']]
    y_train = data['y_train'][data['train_inds']]
    x_valid = data['x_train'][data['valid_inds']]
    y_valid = data['y_train'][data['valid_inds']]
    x_test = data['x_test']
    y_test = data['y_test']

    yidx = 0
    
    rf = RandomForestClassifier(n_estimators=n_estimators, min_samples_split=min_samples_split,
                                max_features=max_features, random_state=random_state)
    rf.fit(x_train, y_train[:, yidx])
    trainerr = (1 - rf.score(x_train, y_train[:,yidx]))
    validerr = (1 - rf.score(x_valid, y_valid[:,yidx]))
    testerr = (1 - rf.score(x_test, y_test[:,yidx]))
    return (trainerr, validerr, testerr)

In [6]:
dataset = 'runorfunc/run_orfunc_051319' #'runagree/run_agree_interact_042919_thresh'
expname = 'runorfunc_all6060_051319'#'runagree_large_interact_042919_thresh'
title = 'LARGE network (all components [60,60] neurons), dataset with complex OR 05/13/19'

xcoeffs = [0.0, 1.0, 3.0, 5.0, 10.0, 15.0]
ycoeffs = [0.0, 0.005, 0.01, 0.03, 0.05, 0.07, 0.1]

lrbaseline = lr_baseline(dataset)
rfbaseline = rf_baseline(dataset)

plot_results(expname , xcoeffs, ycoeffs, lrbaseline, rfbaseline, title)







In [48]:
dict_no_int = {
    'runagree_seed_p1_2_042019' : ('runagree/run_agree_p1_2_042019', 'Small network, dataset with no interactions 04/20/19'),
    'runagree_no_interact_050919' : ('runagree/run_agree_no_interact_050919', 'Small network, dataset with no interactions 05/09/19'),
    'runagree_large_no_interact_050919' : ('runagree/run_agree_no_interact_050919', 'Large encoder network, dataset with no interactions 05/09/19'),
    'runagree_all6060_no_interact_050919' : ('runagree/run_agree_no_interact_050919', 'LARGE network (all components [60,60] neurons), dataset with no interactions 05/09/19')
}
    
dict_thresh = {
    'runagree_interact_042919_thresh' : ('runagree/run_agree_interact_042919_thresh', 'Small network, dataset with 1 threshold interaction 04/29/19'),
    'runagree_large_interact_042919_thresh' : ('runagree/run_agree_interact_042919_thresh', 'Large encoder network, dataset 1 threshold interaction 04/29/19'),
    'runagree_all6060_interact_042919_thresh' : ('runagree/run_agree_interact_042919_thresh', 'LARGE network (all components [60,60] neurons), dataset 1 threshold interaction 04/29/19'),
    'runagree_interact_051319_thresh': ('runagree/run_agree_interact_051319_thresh', 'Small network, dataset with 1 threshold interaction 05/13/19')
}
    
dict_prod = {
    'runagree_interact_050919_prod' : ('runagree/run_agree_interact_050919_prod', 'Small network, dataset with 1 product interaction 05/09/19'),
    'runagree_interact_051019_prod' : ('runagree/run_agree_interact_051019_prod', 'Small network, dataset with 1 product interaction 05/10/19'),
    'runagree_large_interact_051019_prod' : ('runagree/run_agree_interact_051019_prod', 'Large encoder network, dataset with 1 product interaction 05/10/19'),
    'runagree_all6060_interact_052619_prod_10': ('runagree/run_agree_interact_052619_prod_10', 'All network parts LARGE, 10-study dataset, 1 product interaction 05/26/19')
}

dict_prod_thresh = {
    'runagree_interact_042919_prod_thresh' : ('runagree/run_agree_interact_042919_prod_thresh', 'Small network, dataset with 2 product and 2 threshold interactions 04/29/19')
}
    
dict_or = {
    'runorfunc_051319' : ('runorfunc/run_orfunc_051319', 'Small network, dataset with label determined by the OR of two linear functions 05/13/19'),
    'runorfunc_all6060_051319' : ('runorfunc/run_orfunc_051319', 'LARGE network (all [60,60] neurons), dataset determined by the OR of two linear functions 05/13/19'),
    'runorfunc_051419' : ('runorfunc/run_orfunc_051419', 'Small network, dataset with label determined by the OR of two linear functions 05/14/19'),
    'runorfunc_large_051419' : ('runorfunc/run_orfunc_051419', 'Large encoder network, dataset with label determined by the OR of two linear functions 05/14/19'),
    'runorfunc_all6060_051419' : ('runorfunc/run_orfunc_051419', 'LARGE network (all [60,60] neurons), dataset determined by the OR of two linear functions 05/14/19'),
    'runorfunc_all6060_052619_10' : ('runorfunc/run_orfunc_052619_10', 'All network parts LARGE, 10-study dataset labels from OR function 05/26/19')
}



In [98]:
dict_single = {    'runorfunc_all6060_052619_10' : ('runorfunc/run_orfunc_052619_10', 'All network parts LARGE, 10-study dataset labels from OR function 05/26/19')
              }
for expname, (dataset, title) in dict_or.items():
    print(expname)
    lrbaseline = lr_baseline(dataset)
    
    # load rf baseline
    rfbase = np.load(rf_filename).item()
    if dataset in cur:
        rfbaseline = cur[dataset][2]
    else:
        raise Exception('must add random forest baseline for this dataset')
    plot_results(expname , xcoeffs, ycoeffs, lrbaseline, rfbaseline, title)


runorfunc_051319




runorfunc_all6060_051319




runorfunc_051419




runorfunc_large_051419




runorfunc_all6060_051419




runorfunc_all6060_052619_10




In [104]:
rf_filename = 'random_forest_baselines_n_100_minsamp_2_maxfeat_5.npy'
#np.save(rf_filename, {'runagree/run_agree_interact_052619_prod_10' : (0.0, 0.0654, 0.1296) })

for expname, (dataset, title) in dict_or.items():
    print(dataset)
    cur = np.load(rf_filename).item()
    if dataset not in cur:
        trainerr, validerr, testerr = rf_baseline(dataset, n_estimators=100, 
                                                  min_samples_split=2, max_features=5)
        cur[dataset] = (trainerr, validerr, testerr)
        np.save(rf_filename, cur)
    

runorfunc/run_orfunc_051319
runorfunc/run_orfunc_051319
runorfunc/run_orfunc_051419
runorfunc/run_orfunc_051419
runorfunc/run_orfunc_051419
runorfunc/run_orfunc_052619_10


In [100]:
rf_filename = 'random_forest_baselines_n_100_minsamp_2_maxfeat_5.npy'
np.save(rf_filename, {})

for expname, (dataset, title) in dict_no_int.items():
    print(dataset)
    cur = np.load(rf_filename).item()
    if dataset not in cur:
        trainerr, validerr, testerr = rf_baseline(dataset, n_estimators=100, 
                                                  min_samples_split=2, max_features=5)
        cur[dataset] = (trainerr, validerr, testerr)
        np.save(rf_filename, cur)

runagree/run_agree_p1_2_042019
runagree/run_agree_no_interact_050919
runagree/run_agree_no_interact_050919
runagree/run_agree_no_interact_050919


In [101]:
rf_filename = 'random_forest_baselines_n_100_minsamp_2_maxfeat_5.npy'
#np.save(rf_filename, {'runagree/run_agree_interact_052619_prod_10' : (0.0, 0.0654, 0.1296) })

for expname, (dataset, title) in dict_thresh.items():
    print(dataset)
    cur = np.load(rf_filename).item()
    if dataset not in cur:
        trainerr, validerr, testerr = rf_baseline(dataset, n_estimators=100, 
                                                  min_samples_split=2, max_features=5)
        cur[dataset] = (trainerr, validerr, testerr)
        np.save(rf_filename, cur)

runagree/run_agree_interact_042919_thresh
runagree/run_agree_interact_042919_thresh
runagree/run_agree_interact_042919_thresh
runagree/run_agree_interact_051319_thresh


In [102]:
rf_filename = 'random_forest_baselines_n_100_minsamp_2_maxfeat_5.npy'
#np.save(rf_filename, {'runagree/run_agree_interact_052619_prod_10' : (0.0, 0.0654, 0.1296) })

for expname, (dataset, title) in dict_prod.items():
    print(dataset)
    cur = np.load(rf_filename).item()
    if dataset not in cur:
        trainerr, validerr, testerr = rf_baseline(dataset, n_estimators=100, 
                                                  min_samples_split=2, max_features=5)
        cur[dataset] = (trainerr, validerr, testerr)
        np.save(rf_filename, cur)

runagree/run_agree_interact_050919_prod
runagree/run_agree_interact_051019_prod
runagree/run_agree_interact_051019_prod
runagree/run_agree_interact_052619_prod_10


In [103]:
rf_filename = 'random_forest_baselines_n_100_minsamp_2_maxfeat_5.npy'
#np.save(rf_filename, {'runagree/run_agree_interact_052619_prod_10' : (0.0, 0.0654, 0.1296) })

for expname, (dataset, title) in dict_prod_thresh.items():
    print(dataset)
    cur = np.load(rf_filename).item()
    if dataset not in cur:
        trainerr, validerr, testerr = rf_baseline(dataset, n_estimators=100, 
                                                  min_samples_split=2, max_features=5)
        cur[dataset] = (trainerr, validerr, testerr)
        np.save(rf_filename, cur)

runagree/run_agree_interact_042919_prod_thresh


In [None]:
print('DONEEEEEEEEEEEEEE')

In [59]:
baselines = np.load('random_forest_baselines.npy').item()
print(baselines)
print('runagree/run_agree_interact_052619_prod_10' in baselines)

{'runagree/run_agree_interact_052619_prod_10': (0.0, 0.0654, 0.1296), 'runagree/run_agree_p1_2_042019': (0.0, 0.062000000000000055, 0.08279999999999998), 'runagree/run_agree_no_interact_050919': (0.0, 0.0605, 0.18600000000000005), 'runagree/run_agree_interact_042919_thresh': (0.0, 0.06299999999999994, 0.07120000000000004), 'runagree/run_agree_interact_051319_thresh': (0.0, 0.05625000000000002, 0.14380000000000004), 'runagree/run_agree_interact_050919_prod': (0.0, 0.058499999999999996, 0.1532), 'runagree/run_agree_interact_051019_prod': (0.0, 0.07225000000000004, 0.13939999999999997), 'runagree/run_agree_interact_042919_prod_thresh': (0.0, 0.058499999999999996, 0.09660000000000002), 'runorfunc/run_orfunc_051319': (0.0, 0.07799999999999996, 0.11060000000000003), 'runorfunc/run_orfunc_051419': (0.0, 0.08374999999999999, 0.19899999999999995), 'runorfunc/run_orfunc_052619_10': (0.0, 0.06140000000000001, 0.06059999999999999)}
True


In [51]:
lr_filename = 'testtest.npy'
#np.save(lr_filename, {'runagree/run_agree_interact_052619_prod_10' : (0.0, 0.0654, 0.1296) })

for expname, (dataset, title) in dict_no_int.items():
    print(dataset)
    cur = np.load(lr_filename).item()
    if dataset not in cur:
        testerr = lr_baseline(dataset)
        cur[dataset] = (5, testerr)
        np.save(lr_filename, cur)

runagree/run_agree_p1_2_042019
runagree/run_agree_no_interact_050919
runagree/run_agree_no_interact_050919
runagree/run_agree_no_interact_050919


In [52]:
cur = np.load(lr_filename).item()
print(cur)

{'runagree/run_agree_interact_052619_prod_10': 0.4578, 'new': 5, 'runagree/run_agree_p1_2_042019': (5, 0.05359999999999998), 'runagree/run_agree_no_interact_050919': (5, 0.0706)}


In [13]:
#results = np.load("runhet_recon_sweep_score_mat.npy")
#results = np.load("runp_1_2_sweep_dp_score_mat.npy")
#results = np.load("runp_1_2_sweep_eo_041719_prod_score_mat.npy")
results0 = np.load("results/runagree_large_interact_042919_thresh_0_score_mat.npy")#[:,:-1]
results1 = np.load("results/runagree_large_interact_042919_thresh_1_score_mat.npy")#[:,:-1]
results2 = np.load("results/runagree_large_interact_042919_thresh_2_score_mat.npy")#[:,:-1]
results3 = np.load("results/runagree_large_interact_042919_thresh_3_score_mat.npy")#[:,:-1]
results4 = np.load("results/runagree_large_interact_042919_thresh_4_score_mat.npy")#[:,:-1]

In [15]:
#results = results[:, :]
results = (results0 + results1 + results2 + results3 + results4) / 5
print(results.shape)
print(results)

(6, 7)
[[0.0677484  0.0657452  0.06666668 0.07700324 0.0630609  0.07604166
  0.07359774]
 [0.0694311  0.07676282 0.076242   0.06858974 0.06891026 0.06434294
  0.0676282 ]
 [0.06959136 0.0685096  0.06814906 0.07852562 0.07728364 0.07479968
  0.06722758]
 [0.06951124 0.06794872 0.07171476 0.06033652 0.07179488 0.06754808
  0.070633  ]
 [0.07247594 0.0711939  0.06738782 0.06402244 0.06806892 0.067508
  0.07047278]
 [0.06466348 0.07455928 0.06237982 0.06221956 0.07019232 0.0647436
  0.06434296]]


In [None]:
#coeffs = np.array(['0_0', '0_005', '0_01', '0_05', '0_1', '0_2', '0_5', '1_0', '2_0', '4_0', '6_0', '10_0'])
#coeffs = np.array([0. , 0.005, 0.01, 0.05, 0.1, 0.2, 0.5, 1., 2., 4., 6., 10.])
#xcoeffs = [0.0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.5, 1.0, 4.0]
#ycoeffs = [0.0, 0.001, 0.005, 0.01] #, 0.03]#, 0.05, 0.1, 0.15, 0.2, 0.3]
#xcoeffs = [0.0, 0.1, 0.5, 1.0, 3.0, 5.0, 10.0, 15.0]
#ycoeffs = [0.0, 0.0005, 0.001, 0.002, 0.003, 0.005, 0.007, 0.01, 0.03, 0.05, 0.1] #, 0.7]
#xcoeffs = [0.0, 0.1, 0.5, 1.0, 3.0, 5.0, 10.0, 15.0]
#ycoeffs = [0.0, 0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.3, 0.5]
#n = len(coeffs)

#7 by 8 runxcoeffs = [0.0, 0.5, 1.0, 3.0, 5.0, 10.0, 15.0]
#ycoeffs = [0.0, 0.002, 0.005, 0.01, 0.03, 0.05, 0.07, 0.1]

#6 by 7 run
xcoeffs = [0.0, 1.0, 3.0, 5.0, 10.0, 15.0]
ycoeffs = [0.0, 0.005, 0.01, 0.03, 0.05, 0.07, 0.1]



In [None]:
X = np.repeat(xcoeffs, len(ycoeffs)).reshape((len(xcoeffs), len(ycoeffs)))
Y = np.repeat(ycoeffs, len(xcoeffs)).reshape((len(xcoeffs), len(ycoeffs)), order='F')
print(X)
print(Y)

In [None]:
xmesh = np.linspace(xcoeffs[0], xcoeffs[-1])
ymesh = np.linspace(ycoeffs[0], ycoeffs[-1])
xmeshgrid = np.repeat(xmesh, len(ymesh)).reshape((len(xmesh), len(ymesh)))
ymeshgrid = np.repeat(ymesh, len(xmesh)).reshape((len(xmesh), len(ymesh)), order='F')
#xmeshgrid
#ymeshgrid

logreg = np.ones(xmeshgrid.shape) * 0.0464 #0.1872 #0.238 #0.16500 #0.132 #0.0464 #0.1872 #0.0706 #0.2004 #0.0536 #0.1932


In [None]:
fig = plt.figure(figsize = (10, 6))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, results, cmap=cm.coolwarm, linewidth=0, antialiased=False, edgecolor='black')
plane = ax.plot_wireframe(xmeshgrid, ymeshgrid, logreg, color='green', linewidth=.5, antialiased=False)
#surf.set_label('Multi-Study LAFTR')
plane.set_label('Logistic regression baseline')
plt.xlabel('Fairness coefficient')
plt.ylabel('Reconstruction coefficient')
plt.title('LARGE network trained on 1 threshold interaction 04/29/19')
ax.set_zlabel('Error')
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.legend(loc='lower right')
plt.show()