# Distributional RF

In [None]:
from drf import drf
import pandas as pd
import numpy as np

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataBakery(returnXY = True)

LGBM = LGBMRegressor(n_jobs = 1).fit(XTrain, yTrain)
yPredTrain = LGBM.predict(XTrain)

In [None]:
XTrainDf = pd.DataFrame(XTrain)
yTrainDf = pd.DataFrame(yTrain)
XTestDf = pd.DataFrame(XTest)

# Convert all boolean columns to 0/1
for col in XTrainDf.columns:
    if np.isin(XTrainDf[col].dtype, ['bool', 'O']):
        XTrainDf[col] = XTrainDf[col].astype('uint8')

for col in XTestDf.columns:
    if np.isin(XTestDf[col].dtype, ['bool', 'O']):
        XTestDf[col] = XTestDf[col].astype('uint8')

In [None]:
DRF = drf(num_trees = 100)

In [None]:
DRF.fit(XTrainDf, yTrainDf)

In [None]:
out = DRF.predict(XTestDf)

In [None]:
sum(out.weights[1000] > 0)

586

# Counter Factuals

In [None]:
#| hide

import ipdb
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from dddex.loadData import *
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation

import pandas as pd
import numpy as np
import time 
import psutil
import os
import sys

import gurobipy as gp
from gurobipy import GRB
import gurobipy_pandas as gppd

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataBakery(returnXY = True)
LGBM = LGBMRegressor(n_jobs = 1).fit(XTrain, yTrain)

import time
start = time.time()
LSKDEx = LevelSetKDEx(estimator = LGBM, binSize = 100)

LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)
print(time.time() - start)

In [None]:
lowerBounds = LSKDEx.lowerBoundPerBin
lowerBounds[0] = -2
weights = LSKDEx.getWeights(X = XTrain, outputType = 'summarized')

In [None]:
weightsDataList = list()
for bin in LSKDEx.indicesPerBin.values():
    weightsDataBin = weights[bin[0]]
    weightsDataList.append(weightsDataBin)


In [None]:
prob = 0.8
currentForecast = LGBM.predict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob)).flatten()

In [None]:
desiredDecision = 0

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([currentDecision - y if currentDecision - y > 0 else y - currentDecision for y in weightsData[1]])
    costsDesired = np.array([desiredDecision - y if desiredDecision - y > 0 else y - desiredDecision for y in weightsData[1]])

    costsDiff = costsDesired - costsCurrent
    costsDiffWeighted = costsDiff * weightsData[0]
    costsDiffList.append(costsDiffWeighted.sum())        

In [None]:
model = gp.Model()
gppd.set_interactive()
model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 60)
model.setParam('MIPGap', 0.1)

Set parameter Username
Academic license - for non-commercial use only - expires 2024-06-27


In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
constraints.apply(model.getRow)
# constraints.gppd.RHS

index
0       -0.4518072289156626 z[0]
1        -1.127017731944728 z[1]
2        -0.786919542577088 z[2]
3       -0.5423644578313251 z[3]
4       -0.8030510775496351 z[4]
                 ...            
627    42.450287829922665 z[627]
628    42.908414958741616 z[628]
629     43.04376054432887 z[629]
630     42.47230042953655 z[630]
631     65.14987070205943 z[631]
Name: costsConstr, Length: 632, dtype: object

In [None]:
model.setObjective((((currentForecast - lowerBounds) ** 2) * z).sum(), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

2

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

# Find the index of the bin that was chosen
chosenBinIndex = np.where(np.array(values) == 1)[0][0]
lowerBounds[chosenBinIndex]

z[0] = 0.0
z[1] = 0.0
z[2] = 0.0
z[3] = 0.0
z[4] = 0.0
z[5] = 0.0
z[6] = 0.0
z[7] = 0.0
z[8] = 0.0
z[9] = 0.0
z[10] = 0.0
z[11] = 0.0
z[12] = 0.0
z[13] = 0.0
z[14] = 0.0
z[15] = 0.0
z[16] = 0.0
z[17] = 0.0
z[18] = 0.0
z[19] = 0.0
z[20] = 0.0
z[21] = 0.0
z[22] = 0.0
z[23] = 0.0
z[24] = 0.0
z[25] = 0.0
z[26] = 0.0
z[27] = 0.0
z[28] = 0.0
z[29] = 0.0
z[30] = 0.0
z[31] = 0.0
z[32] = 0.0
z[33] = 0.0
z[34] = 0.0
z[35] = 0.0
z[36] = 0.0
z[37] = 0.0
z[38] = 0.0
z[39] = 0.0
z[40] = 0.0
z[41] = 0.0
z[42] = 0.0
z[43] = 0.0
z[44] = 0.0
z[45] = 0.0
z[46] = 0.0
z[47] = 0.0
z[48] = 0.0
z[49] = 0.0
z[50] = 0.0
z[51] = 0.0
z[52] = 0.0
z[53] = 0.0
z[54] = 0.0
z[55] = 0.0
z[56] = 0.0
z[57] = 0.0
z[58] = 0.0
z[59] = 0.0
z[60] = 0.0
z[61] = 0.0
z[62] = 0.0
z[63] = 0.0
z[64] = 0.0
z[65] = 0.0
z[66] = 0.0
z[67] = 0.0
z[68] = 0.0
z[69] = 0.0
z[70] = 0.0
z[71] = 0.0
z[72] = 0.0
z[73] = 0.0
z[74] = 0.0
z[75] = 0.0
z[76] = 0.0
z[77] = 0.0
z[78] = 0.0
z[79] = 0.0
z[80] = 0.0
z[81] = 0.0
z[82] = 0.0
z[83] = 0.0
z[

0.24939031022490632

### Counterfactuals 2.0

In [None]:
prob = 0.8
currentForecast = LGBM.predict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob)).flatten()

In [None]:
desiredDecision = 1

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([currentDecision - y if currentDecision - y > 0 else y - currentDecision for y in weightsData[1]])
    costsDesired = np.array([desiredDecision - y if desiredDecision - y > 0 else y - desiredDecision for y in weightsData[1]])

    costsDiff = costsDesired - costsCurrent
    costsDiffWeighted = costsDiff * weightsData[0]
    costsDiffList.append(costsDiffWeighted.sum())        

In [None]:
# We remove the first element of lowerBounds and add a high number in the end to create upperBounds
upperBounds = np.append(lowerBounds[1:], 1000)

In [None]:
model = gp.Model()
gppd.set_interactive()
model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 60)
model.setParam('MIPGap', 0.1)

In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

y = gppd.add_vars(model, index[0:1], name = 'y', lb = -2, ub = 2)
z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
constraints.apply(model.getRow)
# constraints.gppd.RHS

index
0         0.5481927710843374 z[0]
1          1.859677183309509 z[1]
2         1.2085041862364712 z[2]
3          1.443468875502008 z[3]
4          1.193005260478534 z[4]
                  ...            
627     -2.864679437946392 z[627]
628    -3.2788234415380026 z[628]
629     -12.75197722106167 z[629]
630    -15.179143850514059 z[630]
631    -34.838314420487464 z[631]
Name: costsConstr, Length: 632, dtype: object

In [None]:
constraints = gppd.add_constrs(model, y[0], GRB.GREATER_EQUAL, lowerBounds - 10000 * (1 - z), name = 'lowerBoundConstr')
constraints = gppd.add_constrs(model, y[0], GRB.LESS_EQUAL, upperBounds - 0.000001 + 10000 * (1 - z), name = 'upperBoundConstr')

In [None]:
constraints.apply(model.getRow)

index
0        y[0] + 10000.0 z[0]
1        y[0] + 10000.0 z[1]
2        y[0] + 10000.0 z[2]
3        y[0] + 10000.0 z[3]
4        y[0] + 10000.0 z[4]
               ...          
627    y[0] + 10000.0 z[627]
628    y[0] + 10000.0 z[628]
629    y[0] + 10000.0 z[629]
630    y[0] + 10000.0 z[630]
631    y[0] + 10000.0 z[631]
Name: upperBoundConstr, Length: 632, dtype: object

In [None]:
dpos = gppd.add_vars(model, index[0:1], name = 'dpos', lb = 0)
dneg = gppd.add_vars(model, index[0:1], name = 'dneg', lb = 0)

In [None]:
gppd.add_constrs(model, dpos - dneg, GRB.EQUAL, y - np.float64(currentForecast))

index
0    <gurobi.Constr R1897>
dtype: object

In [None]:
model.setObjective((dpos + dneg).sum(), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

2

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

y[0] = 0.6993343523772637
z[0] = 0.0
z[1] = 0.0
z[2] = 0.0
z[3] = 0.0
z[4] = 0.0
z[5] = 0.0
z[6] = 0.0
z[7] = 0.0
z[8] = 0.0
z[9] = 0.0
z[10] = 0.0
z[11] = 0.0
z[12] = 0.0
z[13] = 0.0
z[14] = 0.0
z[15] = 0.0
z[16] = 0.0
z[17] = 0.0
z[18] = 0.0
z[19] = 0.0
z[20] = 0.0
z[21] = 0.0
z[22] = 0.0
z[23] = 0.0
z[24] = 0.0
z[25] = 0.0
z[26] = 0.0
z[27] = 0.0
z[28] = 0.0
z[29] = 0.0
z[30] = 0.0
z[31] = 0.0
z[32] = 0.0
z[33] = 0.0
z[34] = 0.0
z[35] = 0.0
z[36] = 0.0
z[37] = 0.0
z[38] = 0.0
z[39] = 0.0
z[40] = 0.0
z[41] = 0.0
z[42] = 0.0
z[43] = 0.0
z[44] = 0.0
z[45] = 0.0
z[46] = 0.0
z[47] = 0.0
z[48] = 0.0
z[49] = 0.0
z[50] = 0.0
z[51] = 0.0
z[52] = 0.0
z[53] = 0.0
z[54] = 0.0
z[55] = 0.0
z[56] = 0.0
z[57] = 0.0
z[58] = 0.0
z[59] = 0.0
z[60] = 0.0
z[61] = 0.0
z[62] = 0.0
z[63] = 0.0
z[64] = 0.0
z[65] = 0.0
z[66] = 0.0
z[67] = 0.0
z[68] = 0.0
z[69] = 0.0
z[70] = 0.0
z[71] = 0.0
z[72] = 0.0
z[73] = 0.0
z[74] = 0.0
z[75] = 0.0
z[76] = 0.0
z[77] = 0.0
z[78] = 0.0
z[79] = 0.0
z[80] = 0.0
z[81] = 0.0


### Counterfactuals 3.0

In [None]:
import gurobipy as gp
from gurobipy import GRB
import gurobipy_pandas as gppd
import gurobi_ml as gpml
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from dddex.loadData import *
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation
import ipdb

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataBakery(returnXY = True)

XGB = XGBRegressor(max_depth = 2, 
                    n_estimators = 10, 
                    n_jobs = 1).fit(XTrain, yTrain)

LSKDEx = LevelSetKDEx(estimator = XGB, binSize = 100)
LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)

In [None]:
data, XTrain, yTrain, XTest, yTest = loadDataBakery(returnXY = True)

nn = MLPRegressor([20]*2, max_iter = 10000, random_state=1)
nn.fit(XTrain, yTrain)

LSKDEx = LevelSetKDEx(estimator = nn, binSize = 100)
LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)

In [None]:
lowerBounds = LSKDEx.lowerBoundPerBin
lowerBounds[0] = -2

# We remove the first element of lowerBounds and add a high number in the end to create upperBounds
upperBounds = np.append(lowerBounds[1:], 1000)

weights = LSKDEx.getWeights(X = XTrain, outputType = 'summarized')

weightsDataList = list()
for bin in LSKDEx.indicesPerBin.values():
    weightsDataBin = weights[bin[0]]
    weightsDataList.append(weightsDataBin)

In [None]:
prob = 0.8
currentForecast = nn.predict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob)).flatten()

desiredDecision = 0.1

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([currentDecision - y if currentDecision - y > 0 else y - currentDecision for y in weightsData[1]])
    costsDesired = np.array([desiredDecision - y if desiredDecision - y > 0 else y - desiredDecision for y in weightsData[1]])

    costsDiff = costsDesired - costsCurrent
    costsDiffWeighted = costsDiff * weightsData[0]
    costsDiffList.append(costsDiffWeighted.sum())        

In [None]:
model = gp.Model()
gppd.set_interactive()
model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 60)
model.setParam('MIPGap', 0.1)

In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

y = gppd.add_vars(model, index[0:1], name = 'y', lb = -5, ub = 5)
z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
# constraints.apply(model.getRow)
# constraints.gppd.RHS

In [None]:
constraints = gppd.add_constrs(model, y[0], GRB.GREATER_EQUAL, lowerBounds - 10000 * (1 - z), name = 'lowerBoundConstr')
constraints = gppd.add_constrs(model, y[0], GRB.LESS_EQUAL, upperBounds - 0.000001 + 10000 * (1 - z), name = 'upperBoundConstr')

In [None]:
# constraints.apply(model.getRow)

In [None]:
X_examples = XTest[0, :]
# y_examples = yTest[1:11]

In [None]:
# input_vars = model.addMVar(X_examples.shape, lb = X_examples - 1, ub = X_examples + 1)
# output_vars = model.addMVar(y_examples.shape, lb = -5, ub = 5)

In [None]:
# Transform all columns of XTrain to numeric
XTrain = pd.DataFrame(XTrain).apply(pd.to_numeric)
XTrain

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53,54,55,56,57,58,59,60,61,62
0,0,2.0,False,False,False,9.4,6.7,0.0,0.0,30.0,...,0,0,0,0,1,0,0,1,0,0
1,1,2.0,False,False,False,5.6,5.0,0.0,0.0,31.0,...,0,0,0,0,1,0,0,1,0,0
2,2,2.0,False,False,False,0.1,11.2,0.0,0.0,32.0,...,0,0,0,0,1,0,0,1,0,0
3,3,2.0,False,False,False,2.8,9.9,0.0,0.0,33.0,...,0,0,0,0,1,0,0,1,0,0
4,4,2.0,False,False,False,5.9,3.7,0.0,0.0,34.0,...,0,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63265,74639,71.0,False,False,False,1.2,5.9,0.0,1.0,691.0,...,0,0,1,0,0,1,0,0,0,1
63266,74640,71.0,False,False,False,0.0,4.4,0.0,1.0,692.0,...,0,0,1,0,0,1,0,0,0,1
63267,74641,71.0,False,False,False,0.0,5.9,0.0,0.0,693.0,...,0,0,1,0,0,1,0,0,0,1
63268,74642,71.0,False,False,False,6.3,9.5,0.0,0.0,694.0,...,0,0,1,0,0,1,0,0,0,1


In [None]:
# # Get statistics of every column of XTrain
# XTrainStats = pd.DataFrame(XTrain).describe().loc[['max', 'min']]
# XTrainStats

In [None]:
input_vars1 = model.addMVar((2, ), lb = 0, ub = X_examples[0:2] + 100)
input_vars2 = model.addMVar((3, ), vtype = GRB.BINARY)
input_vars3 = model.addMVar((58, ), lb = 0, ub = X_examples[5:63] + 1)

In [None]:
input_vars = gp.MVar(gp.MVar.tolist(input_vars1) + gp.MVar.tolist(input_vars2) + gp.MVar.tolist(input_vars3))

In [None]:
pred_constr = gpml.add_predictor_constr(model, nn, input_vars, y)

In [None]:
# dpos = gppd.add_vars(model, index[0:1], name = 'dpos', lb = 0)
# dneg = gppd.add_vars(model, index[0:1], name = 'dneg', lb = 0)

In [None]:
# gppd.add_constrs(model, dpos - dneg, GRB.EQUAL, y - np.float64(currentForecast))

In [None]:
model.setObjective((X_examples - input_vars)@(X_examples - input_vars), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

2

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

y[0] = 0.8392814691142121
z[0] = -0.0
z[1] = -0.0
z[2] = -0.0
z[3] = 0.0
z[4] = -0.0
z[5] = -0.0
z[6] = -0.0
z[7] = -0.0
z[8] = -0.0
z[9] = -0.0
z[10] = -0.0
z[11] = -0.0
z[12] = -0.0
z[13] = -0.0
z[14] = -0.0
z[15] = 0.0
z[16] = 0.0
z[17] = 0.0
z[18] = -0.0
z[19] = -0.0
z[20] = -0.0
z[21] = 0.0
z[22] = -0.0
z[23] = 0.0
z[24] = 0.0
z[25] = 0.0
z[26] = 0.0
z[27] = -0.0
z[28] = 0.0
z[29] = 0.0
z[30] = -0.0
z[31] = 0.0
z[32] = 0.0
z[33] = -0.0
z[34] = -0.0
z[35] = 0.0
z[36] = -0.0
z[37] = 0.0
z[38] = 0.0
z[39] = 0.0
z[40] = 0.0
z[41] = 0.0
z[42] = 0.0
z[43] = -0.0
z[44] = -0.0
z[45] = 0.0
z[46] = -0.0
z[47] = -0.0
z[48] = 0.0
z[49] = -0.0
z[50] = -0.0
z[51] = -0.0
z[52] = 0.0
z[53] = -0.0
z[54] = -0.0
z[55] = 0.0
z[56] = -0.0
z[57] = -0.0
z[58] = 0.0
z[59] = 0.0
z[60] = -0.0
z[61] = 0.0
z[62] = 0.0
z[63] = -0.0
z[64] = 0.0
z[65] = 0.0
z[66] = 0.0
z[67] = -0.0
z[68] = 0.0
z[69] = 0.0
z[70] = 0.0
z[71] = 0.0
z[72] = 0.0
z[73] = 0.0
z[74] = -0.0
z[75] = -0.0
z[76] = 0.0
z[77] = 0.0
z[78] = -

In [None]:
input_vars.X - XTest[0, :]

array([0.0026221249723903384, 0.0015932166293786132, -0.0, -0.0, -0.0,
       0.0, -0.0002891720091007244, 0.0006009434269702657,
       0.00043655609121629754, -0.0025880705584313546,
       5.33575162080524e-05, 0.003699569163650329, 0.0037357747688996495,
       -0.000887911758247234, -0.0003117659091556868,
       0.0021323238036478308, 0.0018556263813237217,
       0.00029951034121550313, -0.001471695923761951,
       -0.0024846809545337467, -0.003481903076736126,
       -0.0004673784338319331, -0.002547775771331584,
       0.005757935484950544, -0.0014546353979545245, 0.001512186106466168,
       0.00022281140378044295, 0.002442510933762687,
       0.0017007823509647135, 0.0012615813608922721,
       -0.0013539484558849624, 0.00488788930437975, 0.0005183005188284101,
       -0.002415754931741858, 0.0003120615359445411,
       -0.0007584183465885741, -0.0008155243508835563,
       -0.004008335551553853, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.007886874756735773, 0.00094149964846051

In [None]:
predAlt = nn.predict(input_vars.X.reshape(1, -1))


In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))
weightsDataList[binIndex]

  binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))


(array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.02, 0.02, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.03, 0.01, 0.01, 0.01, 0.01, 0.01, 0.04,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01,
        0.01, 0.01, 0.01]),
 array([0.14141414, 0.14878893, 0.15037594, 0.15753425, 0.16161616,
        0.16609589, 0.183391  , 0.18796992, 0.19520548, 0.19691781,
        0.20034247, 0.20205479, 0.20588235, 0.22556391, 0.23232323,
        0.25252525, 0.25684932, 0.26262626, 0.27272727, 0.28282828,
        0.2885906 , 0.29323308, 0.29604366, 0.3030303 , 0.30872483,
        0.31164384, 0.31313131, 0.315436

In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = y[0].X, side = 'right') - 1))
binIndex

616

In [None]:
y[0].X

0.8392814691142121

In [None]:
costsDiffList[binIndex]

-0.03701528228614137

In [None]:
lowerBounds[616]

0.8392814691141988

In [None]:
costsDiffList[binIndex]

6.674837697188485

In [None]:
costsDiffList

[1.1653765534382778,
 -0.013348267772802291,
 -2.3088845514371372,
 0.3856300996864208,
 0.650333722783639,
 -3.112225171777588,
 -0.4005016891182305,
 0.392829586772407,
 -0.5020106036265393,
 1.2357348636709624,
 -2.4882900683589377,
 -3.6471467300564373,
 1.544453587902793,
 2.9290380898319794,
 3.266081268695605,
 -0.20235372877235447,
 0.41306243157829714,
 1.5202660923499354,
 1.591742352460487,
 1.3322064340129742,
 5.302857425059902,
 2.226961646018453,
 2.0561820178057184,
 2.6472982515150907,
 3.3166618274404485,
 4.018935623068441,
 1.4429644726408104,
 5.059141377839666,
 3.216850070266389,
 4.80168043826631,
 5.251769062030984,
 4.689036428192651,
 5.7053967353379,
 7.55715148468744,
 3.961313884699441,
 4.977416451519867,
 4.398599986191661,
 3.9482762936922016]

In [None]:
XTest[0, :]

array([5.55000000e+02, 5.55000000e+02, 5.84000000e+02, 0.00000000e+00,
       0.00000000e+00, 1.00000000e+00, 3.40000000e+00, 2.60000000e+00,
       0.00000000e+00, 5.39000000e+02, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.50000000e+01,
       1.12000000e+00, 1.60000000e-01, 1.60000000e-01, 6.04743157e-02,
       3.65714286e-03, 1.71047195e-01, 2.40000000e-01, 2.40000000e-01,
       4.00000000e-02, 2.56000000e+00, 1.60000000e-01, 1.82857143e-01,
       1.19727582e-01, 1.43346939e-02, 2.18566760e-01, 5.60000000e-01,
      

In [None]:
pred_constr.get_error()

array([[7.23865412e-14]])

### Counterfactuals 3.1

In [None]:
import gurobipy as gp
from gurobipy import GRB
import gurobipy_pandas as gppd
import gurobi_ml as gpml

import xgboost as xgb
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from dddex.levelSetKDEx_univariate import LevelSetKDEx
from dddex.loadData import *
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation

import ipdb

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataYaz(returnXY = True)

XGB = XGBRegressor(max_depth = 5, 
                   n_estimators = 100, 
                   n_jobs = 1).fit(XTrain, yTrain)

LSKDEx = LevelSetKDEx(estimator = XGB, binSize = 100)
LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)

In [None]:
lowerBounds = LSKDEx.lowerBoundPerBin
lowerBounds[0] = -2

# We remove the first element of lowerBounds and add a high number in the end to create upperBounds
upperBounds = np.append(lowerBounds[1:], 1000)

weights = LSKDEx.getWeights(X = XTrain, outputType = 'summarized')

weightsDataList = list()
for bin in LSKDEx.indicesPerBin.values():
    weightsDataBin = weights[bin[0]]
    weightsDataList.append(weightsDataBin)

In [None]:
prob = 0.8
currentForecast = LSKDEx.pointPredict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob))[0, 0]

desiredDecision = 0.8

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([(1 - prob) * (currentDecision - y) if currentDecision - y > 0 else prob * (y - currentDecision) for y in weightsData[1]])
    costsDesired = np.array([(1 - prob) * (desiredDecision - y) if desiredDecision - y > 0 else prob * (y - desiredDecision) for y in weightsData[1]])

    costsCurrent = (costsCurrent * weightsData[0]).sum()
    costsDesired = (costsDesired * weightsData[0]).sum()
    # ipdb.set_trace()
    costsDiff = costsDesired - costsCurrent
    costsDiffList.append(costsDiff)  

costsDiff = np.array(costsDiffList)      

In [None]:
costsDiff

array([ 0.144     ,  0.13644706,  0.11621578,  0.0964041 ,  0.08193228,
        0.07267596,  0.06153944,  0.05030676,  0.02798327,  0.02235207,
        0.01838795,  0.00976208, -0.00985335, -0.0151143 , -0.01932676,
       -0.02503583, -0.0303601 , -0.04269365, -0.04983175, -0.06377319,
       -0.06619415, -0.0732708 , -0.07730739, -0.09066015, -0.10778555,
       -0.10795848, -0.11764781, -0.12896922, -0.14075128, -0.16013787,
       -0.17647238, -0.18460608, -0.19812567, -0.22838469, -0.25267837,
       -0.28358089, -0.32000106, -0.45340472])

In [None]:
model = gp.Model()
gppd.set_interactive()
# model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 1200)
model.setParam('MIPGap', 0.1)

Set parameter Threads to value 1
Set parameter TimeLimit to value 1200
Set parameter MIPGap to value 0.1


In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

y = gppd.add_vars(model, index[0:1], name = 'y', lb = -5, ub = 5)
z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
# constraints.apply(model.getRow)
# constraints.gppd.RHS

In [None]:
constraints = gppd.add_constrs(model, y[0], GRB.GREATER_EQUAL, lowerBounds - 10000 * (1 - z), name = 'lowerBoundConstr')
constraints = gppd.add_constrs(model, y[0], GRB.LESS_EQUAL, upperBounds - 0.000001 + 10000 * (1 - z), name = 'upperBoundConstr')

In [None]:
X_examples = XTest[0, :]
# y_examples = yTest[1:11]

In [None]:
input_vars1 = model.addMVar((3, ), lb = 0, ub = X_examples[0:3] + 100)
input_vars2 = model.addMVar((3, ), vtype = GRB.BINARY)
input_vars3 = model.addMVar((61, ), lb = 0, ub = X_examples[6:67] + 1)

input_vars = gp.MVar(gp.MVar.tolist(input_vars1) + gp.MVar.tolist(input_vars2) + gp.MVar.tolist(input_vars3))

In [None]:
pred_constr = gpml.add_predictor_constr(model, XGB._Booster, input_vars, y)

In [None]:
# dpos = gppd.add_vars(model, index[0:1], name = 'dpos', lb = 0)
# dneg = gppd.add_vars(model, index[0:1], name = 'dneg', lb = 0)

In [None]:
# gppd.add_constrs(model, dpos - dneg, GRB.EQUAL, y - np.float64(currentForecast))

In [None]:
model.setObjective((X_examples - input_vars)@(X_examples - input_vars), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (linux64)

CPU model: AMD EPYC 7713P 64-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 64 physical cores, 128 logical processors, using up to 1 threads

Optimize a model with 216 rows, 2824 columns and 2947 nonzeros
Model fingerprint: 0x78f3b3c8
Model has 67 quadratic objective terms
Model has 8106 general constraints
Variable types: 165 continuous, 2659 integer (2659 binary)
Coefficient statistics:
  Matrix range     [1e-02, 1e+04]
  Objective range  [7e-03, 1e+03]
  QObjective range [2e+00, 2e+00]
  Bounds range     [1e-02, 5e+03]
  RHS range        [1e-05, 1e+04]
  GenCon rhs range [5e-07, 6e+02]
  GenCon coe range [1e+00, 1e+00]
Presolve added 2105 rows and 0 columns
Presolve removed 0 rows and 1012 columns
Presolve time: 0.31s
Presolved: 2321 rows, 1812 columns, 18974 nonzeros
Presolved model has 49 quadratic objective terms
Variable types: 149 continuous, 1663 integer (1663 binary)
Found heuristic solution: objective

2

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

y[0] = 0.23465877201681534
z[0] = 0.0
z[1] = 0.0
z[2] = 0.0
z[3] = 0.0
z[4] = 0.0
z[5] = 0.0
z[6] = 0.0
z[7] = 0.0
z[8] = 0.0
z[9] = 0.0
z[10] = 0.0
z[11] = 0.0
z[12] = 1.0
z[13] = -0.0
z[14] = -0.0
z[15] = -0.0
z[16] = -0.0
z[17] = -0.0
z[18] = -0.0
z[19] = -0.0
z[20] = -0.0
z[21] = -0.0
z[22] = -0.0
z[23] = -0.0
z[24] = -0.0
z[25] = -0.0
z[26] = -0.0
z[27] = -0.0
z[28] = -0.0
z[29] = -0.0
z[30] = -0.0
z[31] = -0.0
z[32] = -0.0
z[33] = -0.0
z[34] = -0.0
z[35] = -0.0
z[36] = -0.0
z[37] = -0.0
C39 = 555.0
C40 = 555.0
C41 = 584.0
C42 = 0.0
C43 = 0.0
C44 = 1.0
C45 = 3.4
C46 = 2.6
C47 = 0.0
C48 = 539.0
C49 = 0.0
C50 = 0.0
C51 = 0.0
C52 = 1.0
C53 = 0.0
C54 = 0.0
C55 = 0.0
C56 = 0.0
C57 = 0.0
C58 = 0.0
C59 = 0.0
C60 = 0.0
C61 = 0.0
C62 = 0.0
C63 = 0.0
C64 = 1.0
C65 = 0.0
C66 = 0.0
C67 = 0.0
C68 = 0.0
C69 = 0.0
C70 = 1.0
C71 = 1.0
C72 = 0.0
C73 = 0.0
C74 = 0.0
C75 = 0.0
C76 = 0.0
C77 = 0.0
C78 = 25.0
C79 = 1.12
C80 = 0.16
C81 = 0.1599999999999999
C82 = 0.10447786748409271
C83 = 0.003657142857

In [None]:
input_vars.X - XTest[0, :]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.04400355,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.01971778,  0.        ,  0.02908878,
        0.01411766,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.03908598,  0.        , -0.02727416,  0.        ,
        0.        ,  0.03495935])

In [None]:
predAlt = XGB._Booster.predict(input_vars.X.reshape(1, -1))
predAlt

TypeError: ('Expecting data to be a DMatrix object, got: ', <class 'numpy.ndarray'>)

In [None]:
# Get all methods of XGB._Booster without leading underscore
[method_name for method_name in dir(XGB._Booster) if callable(getattr(XGB._Booster, method_name)) and not method_name.startswith("_")]

['attr',
 'attributes',
 'boost',
 'copy',
 'dump_model',
 'eval',
 'eval_set',
 'get_dump',
 'get_fscore',
 'get_score',
 'get_split_value_histogram',
 'inplace_predict',
 'load_config',
 'load_model',
 'num_boosted_rounds',
 'num_features',
 'predict',
 'save_config',
 'save_model',
 'save_raw',
 'set_attr',
 'set_param',
 'trees_to_dataframe',
 'update']

In [None]:
XGB._Booster.predict(xgb.DMatrix(XTest[0:1]))

array([0.08298688], dtype=float32)

In [None]:
type(XTest)

numpy.ndarray

In [None]:
LSKDEx.predict(input_vars.X, probs = prob)

Unnamed: 0,0.8
0,0.182927


In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))
# weightsDataList[binIndex]

  binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))


In [None]:
costsDiff[binIndex]

0.0726759579650854

In [None]:
y[0].X

0.23465877201681534

In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = y[0].X, side = 'right') - 1))
binIndex

616

In [None]:
pred_constr.get_error()

array([[0.06318196]])

### Counterfactuals 3.2

In [None]:
import gurobipy as gp
from gurobipy import GRB
import gurobipy_pandas as gppd
import gurobi_ml as gpml

import xgboost as xgb
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from dddex.levelSetKDEx_univariate import LevelSetKDEx
from dddex.loadData import *
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation

import ipdb

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataYaz(returnXY = True)

GBR = GradientBoostingRegressor(max_depth = 10, 
                                n_estimators = 200).fit(XTrain, yTrain)

LSKDEx = LevelSetKDEx(estimator = GBR, binSize = 100)
LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)

In [None]:
lowerBounds = LSKDEx.lowerBoundPerBin
lowerBounds[0] = -2

# We remove the first element of lowerBounds and add a high number in the end to create upperBounds
upperBounds = np.append(lowerBounds[1:], 1000)

weights = LSKDEx.getWeights(X = XTrain, outputType = 'summarized')

weightsDataList = list()
for bin in LSKDEx.indicesPerBin.values():
    weightsDataBin = weights[bin[0]]
    weightsDataList.append(weightsDataBin)

In [None]:
prob = 0.8
currentForecast = LSKDEx.pointPredict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob))[0, 0]

desiredDecision = 0.8

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([(1 - prob) * (currentDecision - y) if currentDecision - y > 0 else prob * (y - currentDecision) for y in weightsData[1]])
    costsDesired = np.array([(1 - prob) * (desiredDecision - y) if desiredDecision - y > 0 else prob * (y - desiredDecision) for y in weightsData[1]])

    costsCurrent = (costsCurrent * weightsData[0]).sum()
    costsDesired = (costsDesired * weightsData[0]).sum()
    # ipdb.set_trace()
    costsDiff = costsDesired - costsCurrent
    costsDiffList.append(costsDiff)  

costsDiff = np.array(costsDiffList)      

In [None]:
costsDiff

array([ 0.14823529,  0.14561858,  0.12190135,  0.08955398,  0.08382251,
        0.06099144,  0.04733772,  0.03729278,  0.02883985,  0.01274387,
        0.00761468, -0.00437806, -0.02047233, -0.02743952, -0.02985309,
       -0.03464629, -0.04915976, -0.0606195 , -0.06813088, -0.07746985,
       -0.08575814, -0.09237576, -0.10127067, -0.11279205, -0.12252492,
       -0.13028636, -0.14231188, -0.15069313, -0.16136911, -0.17914299,
       -0.19371399, -0.20600445, -0.2290631 , -0.25476357, -0.28011964,
       -0.31438903, -0.35414858, -0.48145747])

In [None]:
model = gp.Model()
gppd.set_interactive()
# model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 400)
model.setParam('MIPGap', 0.01)

Set parameter Threads to value 1
Set parameter TimeLimit to value 400
Set parameter MIPGap to value 0.01


In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

y = gppd.add_vars(model, index[0:1], name = 'y', lb = -5, ub = 5)
z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
# constraints.apply(model.getRow)
# constraints.gppd.RHS

In [None]:
constraints = gppd.add_constrs(model, y[0], GRB.GREATER_EQUAL, lowerBounds - 10000 * (1 - z), name = 'lowerBoundConstr')
constraints = gppd.add_constrs(model, y[0], GRB.LESS_EQUAL, upperBounds - 0.000001 + 10000 * (1 - z), name = 'upperBoundConstr')

In [None]:
X_examples = XTest[0, :]
# y_examples = yTest[1:11]

In [None]:
input_vars1 = model.addMVar((3, ), lb = 0, ub = X_examples[0:3] + 100)
input_vars2 = model.addMVar((3, ), vtype = GRB.BINARY)
input_vars3 = model.addMVar((61, ), lb = 0, ub = X_examples[6:67] + 1)

input_vars = gp.MVar(gp.MVar.tolist(input_vars1) + gp.MVar.tolist(input_vars2) + gp.MVar.tolist(input_vars3))

In [None]:
pred_constr = gpml.add_predictor_constr(model, GBR, input_vars, y, epsilon = 0.0001)

In [None]:
# dpos = gppd.add_vars(model, index[0:1], name = 'dpos', lb = 0)
# dneg = gppd.add_vars(model, index[0:1], name = 'dneg', lb = 0)

In [None]:
# gppd.add_constrs(model, dpos - dneg, GRB.EQUAL, y - np.float64(currentForecast))

In [None]:
model.setObjective((X_examples - input_vars)@(X_examples - input_vars), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (linux64)

CPU model: AMD EPYC 7713P 64-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 64 physical cores, 128 logical processors, using up to 1 threads

Optimize a model with 316 rows, 52918 columns and 53041 nonzeros
Model fingerprint: 0xc30e54d5
Model has 67 quadratic objective terms
Model has 255806 general constraints
Variable types: 265 continuous, 52653 integer (52653 binary)
Coefficient statistics:
  Matrix range     [4e-03, 1e+04]
  Objective range  [7e-03, 1e+03]
  QObjective range [2e+00, 2e+00]
  Bounds range     [1e-02, 7e+02]
  RHS range        [1e-05, 1e+04]
  GenCon rhs range [8e-07, 6e+02]
  GenCon coe range [1e+00, 1e+00]
Presolve removed 285776 rows and 308429 columns (presolve time = 5s) ...
Presolve added 23108 rows and 0 columns
Presolve removed 0 rows and 22814 columns
Presolve time: 9.11s
Presolved: 23424 rows, 30104 columns, 338054 nonzeros
Presolved model has 64 quadratic objective terms
Variable t

9

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

y[0] = 0.20884371365620655
z[0] = 0.0
z[1] = 0.0
z[2] = 0.0
z[3] = 0.0
z[4] = 0.0
z[5] = 0.0
z[6] = 0.0
z[7] = 0.0
z[8] = 0.0
z[9] = 0.0
z[10] = 0.0
z[11] = 1.0
z[12] = -0.0
z[13] = -0.0
z[14] = -0.0
z[15] = -0.0
z[16] = -0.0
z[17] = -0.0
z[18] = -0.0
z[19] = -0.0
z[20] = -0.0
z[21] = -0.0
z[22] = -0.0
z[23] = -0.0
z[24] = -0.0
z[25] = -0.0
z[26] = -0.0
z[27] = -0.0
z[28] = -0.0
z[29] = -0.0
z[30] = -0.0
z[31] = -0.0
z[32] = -0.0
z[33] = -0.0
z[34] = -0.0
z[35] = -0.0
z[36] = -0.0
z[37] = -0.0
C39 = 555.0
C40 = 554.9999993929999
C41 = 583.9999998786092
C42 = 0.0
C43 = 0.0
C44 = 1.0
C45 = 3.4
C46 = 2.599999988085123
C47 = 0.0
C48 = 538.999999726425
C49 = 0.0
C50 = 0.0
C51 = 0.0
C52 = 1.0
C53 = 0.0
C54 = 0.0
C55 = 0.0
C56 = 0.0
C57 = 0.0
C58 = 0.0
C59 = 0.0
C60 = 0.0
C61 = 0.0
C62 = 0.0
C63 = 0.0
C64 = 1.0
C65 = 0.0
C66 = 0.0
C67 = 0.0
C68 = 0.0
C69 = 0.0
C70 = 1.0
C71 = 1.0
C72 = 0.0
C73 = 0.0
C74 = 0.0
C75 = 0.0
C76 = 0.0
C77 = 0.0
C78 = 25.0
C79 = 1.12
C80 = 0.18009999970197677
C81 = 

In [None]:
input_vars.X - XTest[0, :]

array([ 0.00000000e+00, -6.07000061e-07, -1.21390826e-07,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -1.19148771e-08,
        0.00000000e+00, -2.73574983e-07,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  2.00999997e-02,  0.00000000e+00,  0.00000000e+00,
        3.62653047e-03,  0.00000000e+00,  2.00999979e-02,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -1.62793512e-02,  

In [None]:
predAlt = GBR.predict(input_vars.X.reshape(1, -1))
predAlt

array([0.20884371])

In [None]:
LSKDEx.predict(input_vars.X, probs = prob)

Unnamed: 0,0.8
0,0.219512


In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))
# weightsDataList[binIndex]

  binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))


In [None]:
costsDiff[binIndex]

-0.004378058654522718

In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = y[0].X, side = 'right') - 1))
costsDiff[binIndex]

-0.004378058654522718

In [None]:
pred_constr.get_error()

array([[4.16333634e-16]])

### Counterfactuals 3.3

In [None]:
import gurobipy as gp
from gurobipy import GRB
import gurobipy_pandas as gppd
import gurobi_ml as gpml

import xgboost as xgb
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from dddex.levelSetKDEx_univariate import LevelSetKDEx
from dddex.loadData import *
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation

import ipdb

In [None]:
#| hide

data, XTrain, yTrain, XTest, yTest = loadDataBakery(returnXY = True)

GBR = GradientBoostingRegressor(max_depth = 10, 
                                n_estimators = 200).fit(XTrain, yTrain)

LSKDEx = LevelSetKDEx(estimator = GBR, binSize = 100)
LSKDEx.fit(XTrain, yTrain)
weights = LSKDEx.getWeights(XTest)

In [None]:
lowerBounds = LSKDEx.lowerBoundPerBin
lowerBounds[0] = -2

# We remove the first element of lowerBounds and add a high number in the end to create upperBounds
upperBounds = np.append(lowerBounds[1:], 1000)

weights = LSKDEx.getWeights(X = XTrain, outputType = 'summarized')

weightsDataList = list()
for bin in LSKDEx.indicesPerBin.values():
    weightsDataBin = weights[bin[0]]
    weightsDataList.append(weightsDataBin)

In [None]:
prob = 0.8
currentForecast = LSKDEx.pointPredict(XTest[0].reshape(1, -1))
currentDecision = np.array(LSKDEx.predict(XTest[0], probs = prob))[0, 0]

desiredDecision = 0.8

costsDiffList = list()

for weightsData in weightsDataList:

    costsCurrent = np.array([(1 - prob) * (currentDecision - y) if currentDecision - y > 0 else prob * (y - currentDecision) for y in weightsData[1]])
    costsDesired = np.array([(1 - prob) * (desiredDecision - y) if desiredDecision - y > 0 else prob * (y - desiredDecision) for y in weightsData[1]])

    costsCurrent = (costsCurrent * weightsData[0]).sum()
    costsDesired = (costsDesired * weightsData[0]).sum()
    # ipdb.set_trace()
    costsDiff = costsDesired - costsCurrent
    costsDiffList.append(costsDiff)  

costsDiff = np.array(costsDiffList)      

In [None]:
costsDiff

array([ 7.46813265e-02,  7.56069393e-02,  8.83018868e-02,  8.83018868e-02,
        8.10846599e-02,  7.35965064e-02,  8.32001011e-02,  7.02167514e-02,
        8.68020943e-02,  8.68778557e-02,  8.72916133e-02,  8.42017304e-02,
        7.81616281e-02,  7.91676802e-02,  7.80867727e-02,  7.27714259e-02,
        8.83018868e-02,  8.52024648e-02,  8.66190725e-02,  8.40639298e-02,
        8.41955170e-02,  8.40400455e-02,  8.40242586e-02,  8.04362450e-02,
        8.52652640e-02,  8.20525583e-02,  8.72960200e-02,  8.04341540e-02,
        8.35214933e-02,  8.20197129e-02,  7.22453808e-02,  8.39610680e-02,
        8.31044125e-02,  7.91522959e-02,  7.52175001e-02,  7.62724755e-02,
        7.77719874e-02,  8.09964899e-02,  7.76835465e-02,  7.86390989e-02,
        7.87805020e-02,  7.67294073e-02,  8.38390114e-02,  8.55905857e-02,
        7.81574118e-02,  6.89399414e-02,  8.22212083e-02,  7.13687469e-02,
        8.14414546e-02,  8.46439508e-02,  7.16030469e-02,  8.43224000e-02,
        8.58019286e-02,  

In [None]:
model = gp.Model()
gppd.set_interactive()
# model.setParam('OutputFlag', 0)
model.setParam('Threads', 1)
model.setParam('TimeLimit', 600)
model.setParam('MIPGap', 0.01)

Set parameter Threads to value 1
Set parameter TimeLimit to value 600
Set parameter MIPGap to value 0.01


In [None]:
index = pd.RangeIndex(0, len(weightsDataList), name = 'index')

y = gppd.add_vars(model, index[0:1], name = 'y', lb = -5, ub = 5)
z = gppd.add_vars(model, index, name = 'z', vtype = GRB.BINARY)

# Ensure that only one of the binVars can be 1
rhs = pd.Series(1, index = index[0:1], name = 'rhs')
constraints = gppd.add_constrs(model, z.sum(), GRB.EQUAL, rhs, name = 'oneBinVar')

In [None]:
constraints = gppd.add_constrs(model, z * np.array(costsDiffList), GRB.LESS_EQUAL, 0.00001, name = 'costsConstr')
# constraints.apply(model.getRow)
# constraints.gppd.RHS

In [None]:
constraints = gppd.add_constrs(model, y[0], GRB.GREATER_EQUAL, lowerBounds - 10000 * (1 - z), name = 'lowerBoundConstr')
constraints = gppd.add_constrs(model, y[0], GRB.LESS_EQUAL, upperBounds - 0.000001 + 10000 * (1 - z), name = 'upperBoundConstr')

In [None]:
X_examples = XTest[0, :]
# y_examples = yTest[1:11]

In [None]:
input_vars1 = model.addMVar((2, ), lb = 0, ub = X_examples[0:2] + 100)
input_vars2 = model.addMVar((3, ), vtype = GRB.BINARY)
input_vars3 = model.addMVar((58, ), lb = 0, ub = X_examples[5:63] + 1)

input_vars = gp.MVar(gp.MVar.tolist(input_vars1) + gp.MVar.tolist(input_vars2) + gp.MVar.tolist(input_vars3))

In [None]:
pred_constr = gpml.add_predictor_constr(model, GBR, input_vars, y, epsilon = 0.0001)

In [None]:
# dpos = gppd.add_vars(model, index[0:1], name = 'dpos', lb = 0)
# dneg = gppd.add_vars(model, index[0:1], name = 'dneg', lb = 0)

In [None]:
# gppd.add_constrs(model, dpos - dneg, GRB.EQUAL, y - np.float64(currentForecast))

In [None]:
model.setObjective((X_examples - input_vars)@(X_examples - input_vars), GRB.MINIMIZE)
model.getObjective()

<gurobi.LinExpr: 0.0>

In [None]:
model.update()
model.optimize()
model.status

Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (linux64)

CPU model: AMD EPYC 7713P 64-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 64 physical cores, 128 logical processors, using up to 1 threads

Optimize a model with 1930 rows, 1140 columns and 4207 nonzeros
Model fingerprint: 0x1722f592
Model has 63 quadratic objective terms
Model has 1422 general constraints
Variable types: 111 continuous, 1029 integer (1029 binary)
Coefficient statistics:
  Matrix range     [1e-04, 1e+04]
  Objective range  [3e-02, 2e+03]
  QObjective range [2e+00, 2e+00]
  Bounds range     [2e-02, 1e+03]
  RHS range        [1e-05, 1e+04]
  GenCon rhs range [4e-04, 6e+02]
  GenCon coe range [1e+00, 1e+00]
Presolve removed 1237 rows and 489 columns
Presolve time: 0.07s
Presolved: 693 rows, 651 columns, 3808 nonzeros
Presolved model has 32 quadratic objective terms
Variable types: 83 continuous, 568 integer (568 binary)

Root relaxation: objective -4.656613e-10, 485 iterations, 0.01 seconds (0.01

2

In [None]:
# Get Results of optimization
all_vars = model.getVars()
values = model.getAttr("X", all_vars)
names = model.getAttr("VarName", all_vars)

for name, val in zip(names, values):
    print(f"{name} = {val}")

y[0] = 0.40387524540609887
z[0] = 0.0
z[1] = 0.0
z[2] = 0.0
z[3] = 0.0
z[4] = 0.0
z[5] = 0.0
z[6] = 0.0
z[7] = 0.0
z[8] = 0.0
z[9] = 0.0
z[10] = 0.0
z[11] = 0.0
z[12] = 0.0
z[13] = 0.0
z[14] = 0.0
z[15] = 0.0
z[16] = 0.0
z[17] = 0.0
z[18] = 0.0
z[19] = 0.0
z[20] = 0.0
z[21] = 0.0
z[22] = 0.0
z[23] = 0.0
z[24] = 0.0
z[25] = 0.0
z[26] = 0.0
z[27] = 0.0
z[28] = 0.0
z[29] = 0.0
z[30] = 0.0
z[31] = 0.0
z[32] = 0.0
z[33] = 0.0
z[34] = 0.0
z[35] = 0.0
z[36] = 0.0
z[37] = 0.0
z[38] = 0.0
z[39] = 0.0
z[40] = 0.0
z[41] = 0.0
z[42] = 0.0
z[43] = 0.0
z[44] = 0.0
z[45] = 0.0
z[46] = 0.0
z[47] = 0.0
z[48] = 0.0
z[49] = 0.0
z[50] = 0.0
z[51] = 0.0
z[52] = 0.0
z[53] = 0.0
z[54] = 0.0
z[55] = 0.0
z[56] = 0.0
z[57] = 0.0
z[58] = 0.0
z[59] = 0.0
z[60] = 0.0
z[61] = 0.0
z[62] = 0.0
z[63] = 0.0
z[64] = 0.0
z[65] = 0.0
z[66] = 0.0
z[67] = 0.0
z[68] = 0.0
z[69] = 0.0
z[70] = 0.0
z[71] = 0.0
z[72] = 0.0
z[73] = 0.0
z[74] = 0.0
z[75] = 0.0
z[76] = 0.0
z[77] = 0.0
z[78] = 0.0
z[79] = 0.0
z[80] = 0.0
z[81] = 0.0

In [None]:
input_vars.X - XTest[0, :]

array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.0655800717719533, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       -0.06601371296066089, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.23357748708793508, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=object)

In [None]:
predAlt = GBR.predict(input_vars.X.reshape(1, -1))
predAlt

array([0.40387525])

In [None]:
LSKDEx.predict(input_vars.X, probs = prob)

Unnamed: 0,0.8
0,0.540284


In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))
costsDiff[binIndex]

  binIndex = int((np.searchsorted(a = lowerBounds, v = predAlt, side = 'right') - 1))


-0.015342920573476984

In [None]:
binIndex = int((np.searchsorted(a = lowerBounds, v = y[0].X, side = 'right') - 1))
costsDiff[binIndex]

-0.0004004502172679364