In [96]:
import numpy as np
import sys
import os 
import dill
import importlib

sys.path.append('../experiments/experiment-pipeline')
import modelSetup

sys.path.append('../src')
import policies 
import bbDebiasing
import maxEnsembleDebias

importlib.reload(bbDebiasing)
importlib.reload(maxEnsembleDebias);

In [112]:
# import data
data_path = '../data/synthetic'
train_x = np.loadtxt(f"{data_path}/features.csv", delimiter=',')
train_y = np.loadtxt(f"{data_path}/linear-labels.csv", delimiter=',')
test_x = np.loadtxt(f"{data_path}/features_test.csv", delimiter=',')
test_y = np.loadtxt(f"{data_path}/linear-labels_test.csv", delimiter=',')

In [113]:
test_y.shape

(400, 4)

In [12]:
# verify that data is generated deterministically
test_y2 = np.loadtxt(f"{data_path}/linear-labels_test.csv", delimiter=',')
np.sum(test_y!=test_y2)

0

In [21]:
# load in models
specialization='coord'
label_version='linear-label'
model_type='gb'
path = f"../experiments/experiment-pipeline/init-models/{label_version}/{model_type}"
all_model_files = os.listdir(path)

models = []
for filename in all_model_files:
    with open(f"{path}/{filename}", 'rb') as file:
        models.append(dill.load(file))

In [22]:
# models work on train and test and small subsets
for model in models:
    model(train_x)
    model(test_x)
    model(test_x[0:2])

In [114]:
# build BB models
pred_dim = train_y.shape[1]
max_depth = 5
tolerance = 0.01
# first trying w simplex policies, which works
# pols = [policies.Simplex(pred_dim, models[0])]*len(models) 
# now trying w linear constraint policies
gran = 0.1
linear_constraint = np.array([[1,1,0,0], [0,1,1,0]])
max_val = np.array([0.5,0.6])
gran = 0.1
pols = [policies.Linear(pred_dim, models[0], gran, linear_constraint, max_val)]*len(models)
# subsetting training and test to smaller subset
train_x = train_x[0:10]
train_y = train_y[0:10]
test_x = test_x[0:10]
test_y = test_y[0:10]
def init_model(xs):
        return np.tile(np.mean(train_y, axis=0), (len(xs),1))
bbModel = bbDebiasing.bbDebias(init_model, pols[0], train_x, train_y, max_depth, tolerance)
bbModel.debias(models, pols)

# build WB model
wbModel = maxEnsembleDebias.EnsembledModel(models, pols, train_x, train_y, max_depth, tolerance)
wbModel.debias()

Maximum depth reached.
Maximum depth reached.


In [115]:
bbPreds, bbTrans = bbModel.predict(test_x)
wbPreds, wbTrans = wbModel.predict(test_x)

In [116]:
wbPred = wbModel.getPredPayoff(wbTrans)
wbReal = wbModel.getRealPayoff(wbTrans, test_y)

In [121]:
bbPred = bbModel.getPredPayoff(bbTrans)
bbReal = bbModel.getRealPayoff(bbTrans, test_y)

In [125]:
# trying by loading in old models

path = ['linear-label_gb_group_variance_1000_subsample250_BBModel.pkl',
'linear-label_gb_group_variance_1000_subsample250_MaxEnsemble.pkl']

bbPath = f'../experiments/experiment-pipeline/debiased-models/{path[0]}'
wbPath = f'../experiments/experiment-pipeline/debiased-models/{path[1]}'
with open(bbPath, 'rb') as file:
    bbModel = dill.load(file)
with open(wbPath, 'rb') as file:
    maxModel = dill.load(file)

bbModel.predict(test_x)
wbModel.predict(test_x)

ValueError: Found array with 0 sample(s) (shape=(0, 20)) while a minimum of 1 is required by GradientBoostingRegressor.

In [126]:
import json 

pathA = ['linear-label_gb_coord_variance_5000_subsample400_BBModel.pkl',
        'linear-label_gb_coord_variance_5000_subsample400_MaxEnsemble.pkl']
pathB = ['linear-label_gb_group_variance_5000_subsample400_BBModel.pkl',
    'linear-label_gb_group_variance_5000_subsample400_MaxEnsemble.pkl']
pathC = ['linear-label_gb_coord_linear-constraint_5000_subsample400_BBModel.pkl',
    'linear-label_gb_coord_linear-constraint_5000_subsample400_MaxEnsemble.pkl']
pathD = ['linear-label_gb_group_linear-constraint_8000_subsample400_BBModel.pkl',
    'linear-label_gb_group_linear-constraint_8000_subsample400_MaxEnsemble.pkl']

pathsets = [pathA, pathB, pathC, pathD]

with open('pathsets.json', 'w') as f:
    json.dump(pathsets, f)
