In [1]:
import numpy as np
from itertools import product

This notebook is used to determine data about the optimal hyperparameters for vanilla boosting and projecting on the German credit data set.  Submit jobs using `submit_baseline.py` and `submit_fair.py` to collect the required data (hyperparameter grids can be found in the associated scripts).

Here we process those data to find the optimal hyperparameters.  Then, using the function `collect_xgb_fairness_data` in `german_proc.py` we collected all the fairness information from ten new seeds using these optimal hyperparameters (in a shell).  Those files are called things like `fairness-metrics.npz`. 

We then process those `npz` files to produce the data that is found in the table in the main text of the BuDRO paper.  All the stuff at the bottom is old, and corresponds to playing with a different fair distance on German.



Add paths to your collected data here.  

In [2]:
bpath = 'PATHS'
bbpath = 'TO BASELINE DATA'
ppath = 'PATHS'
bppath = 'TO PROJECTING DATA'

In [81]:
# SET THIS FOR PROJECT OR BASELINE
path = bbpath

In [4]:
# for baseline
lambda_grid = [0.1, 10, 100, 250, 500, 750, 1000, 
            1250, 1500, 1750, 2000, 2500] #12

depth_grid = [4,7,10,13] #4
eta_grid = [0.005, 0.01, 0.05, 0.1, 0.3, 0.5] #6
weight_grid = [0.01, 0.1, 1, 2, 5] #5

pos_grid = [0.0]  #1
n_iter = 1000

In [82]:
# For project
lambda_grid = [0.1, 10, 100, 250, 500, 1000, 1500, 2000] #8

depth_grid = [4,7,10] #3
eta_grid = [0.05, 0.1, 0.3, 0.5] #4
weight_grid = [0.01, 0.1, 1, 2, 5] #5

pos_grid = [0.0]  #1
n_iter = 1000

In [83]:
seeds = np.load("german-seeds.npz")['seeds']

Find step with best maximum of averages of test balacc across all parameter selections.

This should be a lower bound on most actual CV values, I think.  Anyways, it's a pretty good way to measure accuracy in a fair way.

In [85]:
seeds[:5]

array([56499, 87062, 69956, 18074, 35277])

In [86]:
%%time
hypers = [depth_grid, eta_grid, weight_grid, lambda_grid, pos_grid]
names = ['depth', 'eta', 'weight', 'lamb', 'pos']

names += ['seed']

# test seeds
tseeds = seeds

# number of boosting steps in each run
n_steps = 1000

max_average = 0
max_step = 0
best_params = dict()

# some sort of parity check
num_files_processed = 0

for pack in product(*hypers):
    pack_aves = np.zeros(n_steps)
    for seed in tseeds:
        
        # load data from current seed and current param selection
        res = dict()

        values = list(pack)
        values.append(seed)

        exp_descriptor = []
        for n, v in zip(names, values):
            exp_descriptor.append(':'.join([n,str(v)]))

        exp_name = '_'.join(exp_descriptor)
        #print(exp_name)

        current = np.loadtxt(path + exp_name)
        pack_aves += current[:,3]
        
        num_files_processed += 1
    
    pack_aves = pack_aves/tseeds.shape[0]
    current_max_step= np.argmax(pack_aves)
    current_max_average = pack_aves[current_max_step]
    
    if current_max_average > max_average:
        max_average = current_max_average
        max_step = current_max_step
        best_params = pack
    
        

CPU times: user 28 s, sys: 1.45 s, total: 29.4 s
Wall time: 4min 44s


In [87]:
num_files_processed

4800

### Vanilla without agebin

In [88]:
max_average

0.7233335

In [89]:
best_params

(10, 0.5, 2, 1000, 0.0)

In [90]:
max_step

105

### Vanilla including agebin

In [78]:
max_average

0.7233335

In [79]:
best_params

(10, 0.5, 2, 1000, 0.0)

In [80]:
max_step

105

Run the function `collect_xgb_fairness_data` (found in `german_proc.py`) with the optimal parameters to make these npz files

In [3]:
stuff = np.load('fairness-metrics.npz')
bin_stuff = np.load('fairness-metrics-agebin.npz')

In [4]:
for thing in stuff: print(thing)

finfo
consinfo


In [13]:
np.std(stuff['finfo'], axis=0)[ fcolnames.index(quant) ]

array([0.02095914, 0.0397562 , 0.01850568, 0.0709734 , 0.15164638,
       0.04000149, 0.03284115, 0.1090465 , 0.15924682, 0.08411454,
       0.06286552, 0.09528521, 0.06143636, 0.05967943, 0.05515644,
       0.0289074 , 0.03967105, 0.04107026, 0.07118135, 0.08618137,
       0.07058435])

In [92]:
stuff['finfo'].mean(axis=0)

array([0.745     , 0.70166667, 0.72333333, 0.81267288, 0.49050912,
       0.6740715 , 0.78286864, 0.24078228, 0.31006255, 0.21548045,
       0.13860138, 0.28575532, 0.72530657, 0.72221505, 0.68818484,
       0.75511239, 0.0788494 , 0.10293774, 0.03500954, 0.03712174,
       0.06468702])

In [11]:
quant = 'age-gap-MAX'
stuff['finfo'].mean(axis=0)[ fcolnames.index(quant) ]

0.31006254910417663

In [14]:
np.std(stuff['consinfo'])

0.022135943621178638

In [98]:
stuff['consinfo'].mean()

0.9200000000000002

In [103]:
bin_stuff['finfo'].mean(axis=0)

array([0.745     , 0.70166667, 0.72333333, 0.81267288, 0.49050912,
       0.6740715 , 0.78286864, 0.24078228, 0.31006255, 0.21548045,
       0.13860138, 0.28575532, 0.72530657, 0.72221505, 0.68818484,
       0.75511239, 0.0788494 , 0.10293774, 0.03500954, 0.03712174,
       0.06468702])

In [128]:
quant = 'sex-gap-RMS'
bin_stuff['finfo'].mean(axis=0)[ fcolnames.index(quant) ]

0.07884940330037588

In [106]:
bin_stuff['consinfo'].mean(axis=0)

array([0.92, 1.  ])

### Project without agebin

In [70]:
max_average

0.6984522999999999

In [71]:
best_params

(7, 0.5, 5, 2000, 0.0)

In [72]:
max_step

111

### Project including agebin

In [52]:
max_average

0.7001191000000001

In [53]:
best_params

(4, 0.5, 2, 1000, 0.0)

In [54]:
max_step

128

## Project 

In [16]:
proj_stuff = np.load('fairness-metrics-project.npz')
bin_proj_stuff = np.load('fairness-metrics-agebin-project.npz')

In [17]:
for name in proj_stuff: print(name)

finfo
consinfo


In [15]:
fcolnames = [
 'p0',
 'p1',
 'balanced',
 'age-TPR-prot',
 'age-TNR-prot',
 'age-TPR-priv',
 'age-TNR-priv',
 'age-gap-RMS',
 'age-gap-MAX',
 'age-ave-odds-diff',
 'age-eq-opp-diff',
 'age-stat-parity',
 'sex-TPR-prot',
 'sex-TNR-prot',
 'sex-TPR-priv',
 'sex-TNR-priv',
 'sex-gap-RMS',
 'sex-gap-MAX',
 'sex-ave-odds-diff',
 'sex-eq-opp-diff',
 'sex-stat-parity',
]

consnames = [
    'status-cons',
    'age bin cons'
]

In [21]:
np.std(proj_stuff['finfo'], axis=0)

array([0.02889107, 0.05580223, 0.0235585 , 0.08348402, 0.07473133,
       0.06191372, 0.03672258, 0.06425389, 0.08648318, 0.05440983,
       0.09484856, 0.06318125, 0.09626979, 0.04321556, 0.0634305 ,
       0.03452309, 0.05030082, 0.06504226, 0.074437  , 0.11272148,
       0.0693004 ])

In [18]:
proj_stuff['finfo'].mean(axis=0)

array([0.70857143, 0.68833333, 0.69845238, 0.74228827, 0.56620356,
       0.67448625, 0.72942463, 0.14396456, 0.18793748, 0.11551154,
       0.06780202, 0.1733528 , 0.7056817 , 0.68971673, 0.67703943,
       0.71615381, 0.07708957, 0.10087642, 0.02753968, 0.02864227,
       0.05481401])

In [119]:
quant = 'age-gap-RMS'
proj_stuff['finfo'].mean(axis=0)[ fcolnames.index(quant) ]

0.14396456233448296

In [22]:
np.std(proj_stuff['consinfo']*800/200)

0.028722813232690134

In [73]:
proj_stuff['consinfo'].mean()*800/200

0.96

In [60]:
bin_proj_stuff['finfo'].mean(axis=0)

array([0.72857143, 0.67166667, 0.70011905, 0.80433955, 0.48985853,
       0.63931054, 0.76489484, 0.23933503, 0.27719621, 0.22003266,
       0.16502901, 0.2787748 , 0.73536478, 0.67795176, 0.63521444,
       0.74970527, 0.10700295, 0.12889258, 0.08595192, 0.10015034,
       0.10809416])

In [124]:
quant = 'balanced'
bin_proj_stuff['finfo'].mean(axis=0)[ fcolnames.index(quant) ]

0.7001190476190476

In [66]:
bin_proj_stuff['consinfo'].mean(axis=0)

array([0.9255, 0.9825])

## Old stuff using a different fair distance on German

## Baseline

First five seeds

In [43]:
max_average

0.7228572

In [44]:
best_params

(10, 0.5, 5, 750, 0.0)

The above works when `tseeds = [seeds[0]]`.  So that's nice.

Obviously no guarantees when I up the number of seeds

In [21]:
max_average

0.767857

In [22]:
max_step

37

In [23]:
best_params

(13, 0.3, 2, 0.1, 0.0)

Here is what the computation spits out for the first 3 seeds.  It takes 2 minutes only.

In [25]:
max_average

0.7373016666666667

In [26]:
max_step

315

In [27]:
best_params

(7, 0.5, 1, 500, 0.0)

The final results for all seeds

In [30]:
max_average

0.7229761

In [31]:
max_step

503

In [32]:
best_params

(7, 0.3, 0.01, 1750, 0.0)

In [122]:
maxine = []

In [194]:
len(maxine)

10

In [190]:
%%time
seed = seeds[0]
hypers = [depth_grid, eta_grid, weight_grid, lambda_grid, pos_grid]
names = ['depth', 'eta', 'weight', 'lamb', 'pos']

names += ['seed']

# where did we get the best?
# where else did we come close?
overall_max = 0
params_max = None
steps_max = None
steps_near_max = None

tol = 10**-8
buff = 0.005

for pack in product(*hypers):

    res = dict()

    values = list(pack)
    values.append(seed)

    exp_descriptor = []
    for n, v in zip(names, values):
        exp_descriptor.append(':'.join([n,str(v)]))
        
    exp_name = '_'.join(exp_descriptor)
    #print(exp_name)
        
    current = np.loadtxt(path + exp_name)
    max_step = np.argmax(current[:,3])

    if current[max_step,3] > overall_max:
        overall_max = current[max_step, 3]
        params_max = exp_descriptor
        
        steps_max = np.where(current[:,3] > overall_max - tol)[0]
        steps_near_max = np.where(current[:,3] > overall_max - buff)
    

CPU times: user 7.63 s, sys: 194 ms, total: 7.82 s
Wall time: 9.76 s


In [173]:
maxine.append(overall_max)

In [174]:
overall_max

0.772619

Seed 9

In [175]:
steps_max

array([410])

In [176]:
steps_near_max

(array([407, 410]),)

In [177]:
params_max

['depth:7', 'eta:0.3', 'weight:1', 'lamb:1250', 'pos:0.0', 'seed:17049']

Seed 8

In [169]:
steps_max

array([190, 198])

In [170]:
steps_near_max

(array([189, 190, 193, 194, 196, 198]),)

In [171]:
params_max

['depth:4', 'eta:0.5', 'weight:5', 'lamb:1250', 'pos:0.0', 'seed:91582']

Seed 7

In [196]:
maxine[7]

0.75119

In [163]:
steps_max

array([771, 772, 795, 796, 797, 799, 800, 801, 803, 804, 805, 806, 807,
       808, 810, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822,
       823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835,
       836, 838, 841, 842, 844, 845, 846, 847, 848, 849, 850, 851, 853,
       854, 855, 856, 858, 859, 860, 861, 862, 864, 865, 866, 867, 868,
       869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881,
       882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894,
       895, 896, 897, 899, 900])

In [164]:
steps_near_max

(array([684, 694, 695, 719, 720, 721, 722, 723, 724, 726, 733, 734, 735,
        736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748,
        749, 750, 751, 752, 753, 754, 756, 757, 759, 760, 761, 768, 771,
        772, 773, 774, 775, 776, 780, 781, 782, 784, 785, 786, 787, 790,
        791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803,
        804, 805, 806, 807, 808, 810, 812, 813, 814, 815, 816, 817, 818,
        819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
        832, 833, 834, 835, 836, 838, 841, 842, 844, 845, 846, 847, 848,
        849, 850, 851, 853, 854, 855, 856, 858, 859, 860, 861, 862, 864,
        865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877,
        878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890,
        891, 892, 893, 894, 895, 896, 897, 899, 900]),)

In [165]:
params_max

['depth:13', 'eta:0.5', 'weight:5', 'lamb:10', 'pos:0.0', 'seed:53039']

Seed 6

In [157]:
steps_max

array([126, 127])

In [158]:
steps_near_max

(array([126, 127, 128, 129]),)

In [159]:
params_max

['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:0.1', 'pos:0.0', 'seed:96481']

Seed 5

In [150]:
steps_max

array([29, 31, 32])

In [151]:
steps_near_max

(array([28, 29, 31, 32, 36, 38, 39, 40, 42, 43, 46]),)

In [152]:
params_max

['depth:10', 'eta:0.05', 'weight:1', 'lamb:0.1', 'pos:0.0', 'seed:54753']

Seed 4

In [142]:
steps_max

array([145, 148, 149])

In [143]:
steps_near_max

(array([144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154]),)

In [146]:
params_max

['depth:4', 'eta:0.1', 'weight:5', 'lamb:0.1', 'pos:0.0', 'seed:35277']

Seed 3

In [198]:
maxine[3]

0.738095

In [179]:
steps_max

array([196, 197])

In [180]:
steps_near_max

(array([196, 197, 198, 199, 200]),)

In [181]:
params_max

['depth:4', 'eta:0.5', 'weight:5', 'lamb:100', 'pos:0.0', 'seed:18074']

Seed 2

In [183]:
steps_max

array([53])

In [184]:
steps_near_max

(array([53, 54, 55]),)

In [185]:
params_max

['depth:4', 'eta:0.5', 'weight:1', 'lamb:10', 'pos:0.0', 'seed:69956']

Seed 1

In [187]:
steps_max

array([233, 235, 239, 240, 241, 242, 250, 251, 252, 253, 254, 255, 256,
       257, 258, 259, 260, 261, 262, 266, 267, 268, 269])

In [188]:
steps_near_max

(array([229, 230, 233, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
        245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257,
        258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 271,
        280, 281, 284, 285, 286, 287, 290, 291, 292, 300, 301, 302, 303]),)

In [189]:
params_max

['depth:10', 'eta:0.5', 'weight:5', 'lamb:500', 'pos:0.0', 'seed:87062']

Seed 0

In [191]:
steps_max

array([37])

In [192]:
steps_near_max

(array([35, 37, 65]),)

In [193]:
params_max

['depth:13', 'eta:0.3', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499']

In [92]:
# Find other parameter sets that got near to the max to look for patterns
params_near_max = []
steps_near_max = []

tol = 0.01785

for pack in product(*hypers):

    res = dict()

    values = list(pack)
    values.append(seed)

    exp_descriptor = []
    for n, v in zip(names, values):
        exp_descriptor.append(':'.join([n,str(v)]))
        
    exp_name = '_'.join(exp_descriptor)
    #print(exp_name)
        
    current = np.loadtxt(path + exp_name)
    max_step = np.argmax(current[:,3])

    if current[max_step,3] > overall_max - tol:
        params_near_max.append(exp_descriptor)
        steps_near_max.append( np.where(current[:,3] > overall_max - tol)[0].shape[0] )
        

## Seed 56499
Doesn't really follow the patterns of the other seeds, though we still see best performance with high lr,
higher weight, and lower lambda

In [66]:
overall_max

0.767857

In [67]:
steps_max

array([37])

In [68]:
steps_near_max

(array([35, 37, 65]),)

In [69]:
params_max

['depth:13', 'eta:0.3', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499']

tol = 0.01785.  Looking for more than 75% - would definitely catch these

In [93]:
len(params_near_max)

9

In [94]:
steps_near_max

[6, 68, 4, 1, 31, 102, 22, 1, 21]

In [95]:
params_near_max

[['depth:7', 'eta:0.01', 'weight:0.01', 'lamb:10', 'pos:0.0', 'seed:56499'],
 ['depth:7', 'eta:0.01', 'weight:0.1', 'lamb:10', 'pos:0.0', 'seed:56499'],
 ['depth:7', 'eta:0.3', 'weight:1', 'lamb:2500', 'pos:0.0', 'seed:56499'],
 ['depth:7', 'eta:0.3', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:10', 'eta:0.005', 'weight:5', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:10', 'eta:0.01', 'weight:5', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:10', 'eta:0.5', 'weight:5', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:13', 'eta:0.05', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:13', 'eta:0.3', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499']]

tol = 0.015 - might catch these.  Potentially write out more?

In [89]:
len(params_near_max)

2

In [90]:
steps_near_max

[16, 8]

tol = 0.01- would probably not catch these.  Potentially write out more?

In [82]:
len(params_near_max)

2

In [83]:
params_near_max

[['depth:10', 'eta:0.5', 'weight:5', 'lamb:0.1', 'pos:0.0', 'seed:56499'],
 ['depth:13', 'eta:0.3', 'weight:2', 'lamb:0.1', 'pos:0.0', 'seed:56499']]

In [84]:
steps_near_max

[4, 4]

## Seed 18074

In [57]:
overall_max

0.738095

In [58]:
steps_max

array([196, 197])

In [59]:
steps_near_max

(array([196, 197, 198, 199, 200]),)

In [60]:
params_max

['depth:4', 'eta:0.5', 'weight:5', 'lamb:100', 'pos:0.0', 'seed:18074']

tol = 0.01

In [62]:
len(params_near_max)

4

In [63]:
params_near_max

[['depth:4', 'eta:0.5', 'weight:5', 'lamb:100', 'pos:0.0', 'seed:18074'],
 ['depth:7', 'eta:0.1', 'weight:5', 'lamb:250', 'pos:0.0', 'seed:18074'],
 ['depth:7', 'eta:0.5', 'weight:5', 'lamb:750', 'pos:0.0', 'seed:18074'],
 ['depth:13', 'eta:0.5', 'weight:5', 'lamb:750', 'pos:0.0', 'seed:18074']]

In [64]:
steps_near_max

[12, 58, 23, 188]

## Seed 69956

In [39]:
overall_max

0.752381

In [41]:
steps_max

array([53])

In [42]:
steps_near_max

(array([53, 54, 55]),)

This is over 0.75 on seed 87062

In [40]:
params_max

['depth:4', 'eta:0.5', 'weight:1', 'lamb:10', 'pos:0.0', 'seed:69956']

tol = 0.01

In [53]:
len(params_near_max)

4

In [54]:
params_near_max

[['depth:4', 'eta:0.3', 'weight:2', 'lamb:10', 'pos:0.0', 'seed:69956'],
 ['depth:4', 'eta:0.5', 'weight:1', 'lamb:10', 'pos:0.0', 'seed:69956'],
 ['depth:7', 'eta:0.5', 'weight:1', 'lamb:250', 'pos:0.0', 'seed:69956'],
 ['depth:13', 'eta:0.5', 'weight:5', 'lamb:10', 'pos:0.0', 'seed:69956']]

In [55]:
steps_near_max

[16, 3, 42, 2]

## Seed 87062

In [20]:
overall_max

0.761905

In [21]:
steps_max

array([233, 235, 239, 240, 241, 242, 250, 251, 252, 253, 254, 255, 256,
       257, 258, 259, 260, 261, 262, 266, 267, 268, 269])

In [22]:
steps_near_max

(array([229, 230, 233, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
        245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257,
        258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 271,
        280, 281, 284, 285, 286, 287, 290, 291, 292, 300, 301, 302, 303]),)

In [18]:
params_max

['depth:10', 'eta:0.5', 'weight:5', 'lamb:500', 'pos:0.0', 'seed:87062']

tol = 0.0055

In [36]:
len(params_near_max)

29

In [37]:
params_near_max

[['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.1', 'lamb:10', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.1', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:1', 'lamb:1250', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:2', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:5', 'lamb:250', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:5', 'lamb:500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:5', 'lamb:1750', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:5', 'lamb:2000', 'pos:0.0', 'seed:87062'],
 ['depth:7', 'eta:0.1', 'weight:2', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:7', 'eta:0.1', 'weight:5', 'lamb:250', 'pos:0.0', 'seed:87062'],
 ['depth:7', 'eta:0.5', 'weight:0.01', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:7', 'eta:0.5', 'weight:0.1', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:7', 'eta:0.5'

tol = 0.0075

In [31]:
len(params_near_max)

43

In [32]:
params_near_max

[['depth:4', 'eta:0.05', 'weight:0.1', 'lamb:0.1', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:750', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.1', 'lamb:10', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.1', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.1', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:1', 'lamb:500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:1', 'lamb:1250', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:2', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:2', 'lamb:750', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:5', 'lamb:250', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5

tol = 0.01

In [27]:
len(params_near_max)

75

In [28]:
params_near_max

[['depth:4', 'eta:0.05', 'weight:0.01', 'lamb:0.1', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.05', 'weight:0.1', 'lamb:0.1', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:0.01', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:0.1', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:250', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:750', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:1000', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.3', 'weight:5', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:10', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:100', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:500', 'pos:0.0', 'seed:87062'],
 ['depth:4', 'eta:0.5', 'weight:0.01', 'lamb:1500', 'pos:0.0', 'seed:87062'],
 ['depth:4',

Seed 87072:
For higher depths, we also want higher eta, higher weight, lower lamb.  Depth 4, high lr also seems a good place to look

Other seeds: 
Depth 4, High LR, High weight, Lower lambda seems to be the sweet spot

SenSR baseline, copied in

In [208]:
balacc = np.array([
    0.6714285714285715,
    0.6523809523809524,
    0.6571428571428571,
    0.6464285714285715,
    0.6857142857142857,
    0.6869047619047619,
    0.6726190476190477,
    0.6726190476190477,
    0.7095238095238094
    
])

grms = np.array([
    0.04024344361776888,
    0.06936229430483305,
    0.03634642110103115,
    0.06871806348059763,
    0.09493117482434614,
    0.09671310523831388,
    0.04265681665704136,
    0.03903317120818163,
    0.116760379648778,
    
])

gmax = np.array([
    0.04355812122802427,
    0.09808612440191383,
    0.038510911424903704,
    0.08843537414965985,
    0.10101010101010105,
    0.12899896800825583,
    0.04999999999999999,
    0.050000000000000044,
    0.15999999999999998,
    
])

arms = np.array([
    0.07036315904001197,
    0.08173221118917225,
    0.06338688321732534,
    0.10887331417526766,
    0.14683870657704515,
    0.17804758066488988,
    0.1490751918689064,
    0.06773813731813924,
    0.12494998999599802,
    
])

amax = np.array([
    0.07936507936507942,
    0.11384335154826963,
    0.07499999999999996,
    0.11190817790530849,
    0.20766129032258063,
    0.2185792349726775,
    0.1898527004909984,
    0.09166666666666667,
    0.16000000000000003
    
])


In [209]:
balacc.mean()

0.6727513227513228

In [210]:
grms.mean()

0.06719609667565464

In [211]:
gmax.mean()

0.08428884446920651

In [212]:
arms.mean()

0.11011168600519511

In [213]:
amax.mean()

0.1386529445857312

In [206]:
maxine

[0.767857,
 0.761905,
 0.752381,
 0.738095,
 0.754762,
 0.786905,
 0.758333,
 0.75119,
 0.79881,
 0.772619]

In [207]:
np.array(maxine).mean()

0.7642857000000001

In [200]:
bgrms = np.array([
    0.16111469369973225,
    0.1292026000688584,
    0.06363576197939921,
    0.07193163694745597,
    0.1319938271401391,
    0.02701424835315153,
    0.04438171013663089,
    0.0355097667998917,
    0.027663079407949268,
    0.048309458255831696
])

bgmax = np.array([
    0.19780219780219788,
    0.16028708133971292,
    0.07830551989730417,
    0.09890109890109888,
    0.13468013468013468,
    0.031100478468899517,
    0.050000000000000044,
    0.050000000000000044,
    0.03741496598639449,
    0.0581365828890581
])

barms = np.array([
    0.043989904698973015,
    0.1341382583460648,
    0.17786729645211602,
    0.25559895598702725,
    0.1086228172207549,
    0.030629151228209077,
    0.0412641275062117,
    0.20924035653802373,
    0.15103807466993213,
    0.0321720586625656
])

bamax = np.array([
    0.05350152240104389,
    0.16120218579234968,
    0.20785597381342058,
    0.27643784786641923,
    0.11290322580645162,
    0.041666666666666685,
    0.05833333333333335,
    0.292962356792144,
    0.19999999999999996,
    0.03830645161290325
])

In [201]:
barms.mean()

0.1184561001309878

In [202]:
bamax.mean()

0.14431695640847322

In [204]:
bgrms.mean()

0.074075678278904

In [205]:
bgmax.mean()

0.08966280599648006

German NN baseline, as trained by SenSR.train_nn, more notes found in txt doc

In [4]:
nnacc = np.array([
    0.719047619047619,
    0.6785714285714286,
    0.7226190476190477,
    0.6273809523809524,
    0.6928571428571428,
    0.6869047619047619,
    0.6535714285714286,
    0.65,
    0.7535714285714286,
    0.6607142857142858
])

ngrms = np.array([
    0.06856688977380215,
    0.20342798336660847,
    0.12524805845628068,
    0.09915704645861391,
    0.007536566148202717,
    0.19521280831137455,
    0.08854172458359469,
    0.017367534976511747,
    0.07737777522071354,
    0.027754100177395494
])

ngmax = np.array([
    0.07692307692307698,
    0.2870813397129187,
    0.17586649550706035,
    0.13919413919413925,
    0.010101010101010166,
    0.20123839009287925,
    0.10027100271002709,
    0.024561403508772006,
    0.08571428571428574,
    0.038588474232038594
])

narms = np.array([
    0.12310212127690572,
    0.06439952469822834,
    0.10607233065206029,
    0.0947921774625083,
    0.1707835705801741,
    0.2239355298655755,
    0.10999351197257783,
    0.042542621165714514,
    0.22553947671207258,
    0.10424056472898473
])

namax = np.array([
    0.15876468029578072,
    0.09107468123861562,
    0.15000000000000002,
    0.13151602104256332,
    0.23790322580645162,
    0.22916666666666663,
    0.14729950900163669,
    0.05833333333333324,
    0.29999999999999993,
    0.11292962356792147
])

In [3]:
import numpy as np

In [33]:
nnacc[:5].mean()

0.6880952380952381

In [34]:
ngrms[:5].mean()

0.10078730884070157

In [35]:
ngmax[:5].mean()

0.1378332122876411

In [36]:
narms[:5].mean()

0.11182994493397533

In [37]:
namax[:5].mean()

0.15385172167668226

And here are the baseline CV gap values

In [46]:
cgrms = np.array([
    0.12467182055088183, 
0.04688653137954779,
0.10982658487409028, 
0.08668406714146641, 
0.022609698444608077, 
0.1485449233233704, 
0.10907291454968232, 
0.10638795567939911, 
0.017699890699629857, 
0.0705102072754589,
])

cgmax = np.array([
    0.1501831501831501,
    0.05023923444976086,
    0.15296092064253175,
    0.1208791208791209,
    0.030303030303030387,
    0.1866028708133971,
    0.15000000000000002,
    0.1499999999999999,
    0.02285714285714291,
    0.07793856308707792,
])

carms = np.array([
    0.10999670374063727, 
0.1948046445527366, 
0.17509974076319232, 
0.2165599978542021, 
0.1731277233949882, 
0.12225464143383527, 
0.13334889237035719, 
0.22414529930666846, 
0.033354160160315866, 
0.072652991728215, 
])

camax = np.array([
    0.1287516311439757,
    0.20856102003642984,
    0.24166666666666659,
    0.2266857962697274,
    0.24395161290322576,
    0.1612021857923498,
    0.18333333333333335,
    0.3142389525368249,
    0.040000000000000036,
    0.10147299509001628,
])

In [47]:
cgrms.mean()

0.0842894593918135

In [48]:
cgmax.mean()

0.10919640332152118

In [49]:
carms.mean()

0.14553447953051485

In [50]:
camax.mean()

0.18498641937725496