In [2]:
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.optim import AdamW
from xgboost import XGBClassifier

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle as pkl
import scipy
import os

from torch.nn import Linear, ReLU, Dropout
from torch.nn.functional import relu
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix

import gurobipy as gb
import time

from operator import itemgetter

In [3]:
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"

In [4]:
def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = (
        False  # Force cuDNN to use a consistent convolution algorithm
    )
    torch.backends.cudnn.deterministic = (
        True  # Force cuDNN to use deterministic algorithms if available
    )
    torch.use_deterministic_algorithms(
        True
    )  # Force torch to use deterministic algorithms if available


In [5]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))

In [6]:
# for each solution convert the dictionary to a list of values
solutions = [
    list(corlat_dataset[i]["solution"].values())
    for i in range(len(corlat_dataset))
]

In [7]:
# convert solutions_list to numpy array
solutions = np.array(solutions)

In [8]:
model_files = os.listdir("instances/mip/data/COR-LAT")

In [9]:
# get the indices of the binary variables
indices = []
for i in range(len(corlat_dataset)):
    indices.append(list(corlat_dataset[i]["solution"].keys()))

In [10]:
# convert indices to numpy array
indices = np.array(indices)

In [11]:
# read X_train, X_test, y_train, y_test from Data/corlat/ using numpy.load
X_train = np.load("Data/corlat/X_train.npy")
X_test = np.load("Data/corlat/X_test.npy")
y_train = np.load("Data/corlat/y_train.npy")
y_test = np.load("Data/corlat/y_test.npy")

In [12]:
# train and test indices
train_indices = np.load("Data/corlat/train_idx.npy")
test_indices = np.load("Data/corlat/test_idx.npy")

In [13]:
# load the xgboost model
with open("Models/Tabular/xgboost_model_corlat.pkl", "rb") as f:
    xgb_model = pkl.load(f)

In [14]:
y_pred = xgb_model.predict(X_test)

In [15]:
y_test = y_test.astype(np.int)
print("F1 score: ", f1_score(y_test, y_pred, average="micro"))
print("Precision: ", precision_score(y_test, y_pred, average="micro"))
print("Recall: ", recall_score(y_test, y_pred, average="micro"))

F1 score:  0.9237003241685676
Precision:  0.9232569302772111
Recall:  0.9241441441441441


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_test = y_test.astype(np.int)


In [16]:
# save xgboost model
xgb_model.save_model("Models/Tabular/xgboost_model_corlat.json")

# now test feasibility of the solutions

In [16]:
# basic optimization solving time
firstInstanceTest = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[0]])
firstInstanceTest.Params.Threads = 1
firstInstanceTest.optimize()

Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Read LP format model from file instances/mip/data/COR-LAT/cor-lat-2f+r-u-10-10-10-5-100-3.462.b208.000000.prune2.lp
Reading time = 0.01 seconds
obj: 470 rows, 466 columns, 1751 nonzeros
Set parameter Threads to value 1
Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: AMD Ryzen Threadripper 1920X 12-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 12 physical cores, 24 logical processors, using up to 1 threads

Optimize a model with 470 rows, 466 columns and 1751 nonzeros
Model fingerprint: 0x3db7ce21
Variable types: 366 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [1e+00, 1e+01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+02]
Presolve removed 17 rows and 9 columns
Presolve time: 0.00s
Presolved: 453 rows, 457 columns, 1715 nonzeros
Variable types: 360 continuous, 97 integer 

In [16]:
# we are going to use first instance of test data
firstInstanceTest = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[0]])
firstInstanceTest.Params.Threads = 1

# get indices of binary variables
firstInstanceTestBinaryIndices = indices[test_indices[0]]

# for this first instance of test data, we are going to use the xgboost prediction and fix the binary variables' values
# to the values predicted by xgboost

# get predictions from xgboost model
xgb_pred = xgb_model.predict(X_test[0].reshape(1, -1)).reshape(-1)

# get variables from the model
modelVars = firstInstanceTest.getVars()

# need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
for i in range(len(firstInstanceTestBinaryIndices)):
    modelVars[firstInstanceTestBinaryIndices[i]].setAttr("VType", "C")

    # for each index in firstInstanceTestBinaryIndices, set the value of the corresponding variable to the value predicted by xgboost
    modelVars[firstInstanceTestBinaryIndices[i]].setAttr("LB", xgb_pred[i])
    modelVars[firstInstanceTestBinaryIndices[i]].setAttr("UB", xgb_pred[i])
    
# After relaxing or fixing the binary variables, we can compute the IIS as before
firstInstanceTest.computeIIS()
# # Print the conflicting variables and constraints
# for v in firstInstanceTest.getVars():
#   if v.IISLB > 0 or v.IISUB > 0:
#     print(v.varName, "is part of the IIS")
# for c in firstInstanceTest.getConstrs():
#   if c.IISConstr > 0:
#     print(c.ConstrName, "is part of the IIS")

# only assign the predicted variables that are not in the IIS to warm start the model
for i, v in enumerate(firstInstanceTest.getVars()):
    if v.IISLB == 0 and v.IISUB == 0:
        if i in firstInstanceTestBinaryIndices:
            # print(v.varName, "is not part of the IIS")
            v.setAttr("VType", "B")
            v.setAttr("LB", 0)
            v.setAttr("UB", 1)
            v.setAttr("Start", xgb_pred[i])
            
    
    # else if the variable is in the IIS, 
    # get the relaxed variable and 
    # set the bounds to 0 and 1 for the relaxed binary variables
    else:
        if i in firstInstanceTestBinaryIndices:
            # print(v.varName, "is part of the IIS")
            v.setAttr("VType", "B")
            v.setAttr("LB", 0)
            v.setAttr("UB", 1)
            

firstInstanceTest.optimize()

Read LP format model from file instances/mip/data/COR-LAT/cor-lat-2f+r-u-10-10-10-5-100-3.462.b208.000000.prune2.lp
Reading time = 0.01 seconds
obj: 470 rows, 466 columns, 1751 nonzeros
Set parameter Threads to value 1
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    3.9700000e+02   2.880000e+02   0.000000e+00      0s

IIS computed: 56 constraints and 73 bounds
IIS runtime: 0.01 seconds (0.00 work units)
Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: AMD Ryzen Threadripper 1920X 12-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 12 physical cores, 24 logical processors, using up to 1 threads

Optimize a model with 470 rows, 466 columns and 1751 nonzeros
Model fingerprint: 0x499154c7
Variable types: 366 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [1e+00, 1e+01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+02]

User MIP start did not pr

In [17]:
# # we are going to use first instance of test data
# firstInstanceTest = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[0]])
# firstInstanceTest.Params.Threads = 1

# # get indices of binary variables
# firstInstanceTestBinaryIndices = indices[test_indices[0]]

# # for this first instance of test data, we are going to use the xgboost prediction and fix the binary variables' values
# # to the values predicted by xgboost

# # get predictions from xgboost model
# xgb_pred = xgb_model.predict(X_test[0].reshape(1, -1)).reshape(-1)

# # get variables from the model
# modelVars = firstInstanceTest.getVars()

# # need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
# for i in range(len(firstInstanceTestBinaryIndices)):
#     modelVars[firstInstanceTestBinaryIndices[i]].setAttr("VType", "C")

#     # for each index in firstInstanceTestBinaryIndices, set the value of the corresponding variable to the value predicted by xgboost
#     modelVars[firstInstanceTestBinaryIndices[i]].setAttr("LB", xgb_pred[i])
#     modelVars[firstInstanceTestBinaryIndices[i]].setAttr("UB", xgb_pred[i])
    
# # After relaxing or fixing the binary variables, we can compute the IIS as before
# firstInstanceTest.computeIIS()

# # for each constraint, if constraint in IIS, get the slack value. Implement this.
# for c in firstInstanceTest.getConstrs():
#     if c.IISConstr > 0:
#         print(c.ConstrName, "is part of the IIS")
#         print(c.Slack)

In [18]:
# firstInstanceTest.getVars()[0]

In [19]:
# # only assign the predicted variables that are not in the IIS to warm start the model
# for i, v in enumerate(firstInstanceTest.getVars()):
#     if v.IISLB == 0 and v.IISUB == 0:
#         if i in firstInstanceTestBinaryIndices:
#             print(v.varName, "is not part of the IIS")
#             v.setAttr("VType", "B")
#             v.setAttr("LB", 0)
#             v.setAttr("UB", 1)
            
    
#     # else if the variable is in the IIS, 
#     # get the relaxed variable and 
#     # set the bounds to 0 and 1 for the relaxed binary variables
#     else:
#         if i in firstInstanceTestBinaryIndices:
#             print(v.varName, "is part of the IIS")
#             v.setAttr("VType", "B")
#             v.setAttr("LB", 0)
#             v.setAttr("UB", 1)

In [20]:
# # continue solving the model
# firstInstanceTest.optimize()

In [31]:
# the weights for each variable in the loss function should take the form of
# w_{ij} = exp(-c_i^T x^{i, j}) / sum(exp(-c_i^T x^{i, k})) for k = 1, ..., N_i
# where c_i is the vector of cost coefficient for training instance i, j is the index of the training instance, and N_i is the number of training instances


# we are going to use first instance of test data
# firstInstanceTest = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[0]])

def custom_obj(model_files: list, indices, train_indices, y_true: np.ndarray, y_pred: np.ndarray):
    
    instance_weights = []
    
    gurobi_env = gb.Env()
    gurobi_env.setParam("OutputFlag", 0)
    
    # convert logit predictions to probabilities
    y_pred = 1.0 / (1.0 + np.exp(-y_pred))

    # convert predictions of N_samples, N_variables to binary
    y_pred_binary = np.where(y_pred > 0.5, 1, 0)
    
    # Compute the weights for each training instance
    for i in range(y_true.shape[0]):
        
        model = gb.read("instances/mip/data/COR-LAT/" + model_files[train_indices[i]], env=gurobi_env)
        
        modelVars = model.getVars()
        
        instanceBinaryIndices = indices[train_indices[i]]

        # need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
        for j in range(len(instanceBinaryIndices)):
            modelVars[instanceBinaryIndices[j]].setAttr("VType", "C")

            # for each index in firstInstanceTestBinaryIndices, set the value of the corresponding variable to the value predicted by xgboost
            modelVars[instanceBinaryIndices[j]].setAttr("LB", y_pred_binary[i, j])
            modelVars[instanceBinaryIndices[j]].setAttr("UB", y_pred_binary[i, j])
        
        
        # Compute the IIS to find the list of violated constraints and variables
        model.computeIIS()
        
        # Initialize the weights
        weights = np.zeros_like(y_true[i])
        
        c = model.getAttr("Obj", model.getVars())
        
        # get violated variables indices
        violated_vars_indices = [k for k, v in enumerate(model.getVars()) if (v.IISLB > 0 or v.IISUB > 0) and k in instanceBinaryIndices]
        
        for j, v in enumerate(model.getVars()):
            # not violated
            if (v.IISLB == 0 and v.IISUB == 0) and j in instanceBinaryIndices:
                weights[j] = np.exp(np.dot(c[j], y_pred_binary[i, j]))
        
                
        denominator = sum(  np.exp(np.dot(c[k], y_pred_binary[i, k])) for k in range(y_pred_binary[i].shape[0]) if not k in violated_vars_indices  )
        
        weights /= denominator    
        
        instance_weights.append(weights)
    
    
    # y_pred is of shape (N_samples, N_binary_variables)
    # weights is of shape (N_samples, N_binary_variables)
    # each element in weights is the weight for the corresponding element in y_pred
    # multiply the weights by the loss
    
    grad = y_pred - y_true
    hess = y_pred * (1.0 - y_pred)
    
    # multiply the weights by the gradient and hessian
    instance_weights = np.array(instance_weights)
    grad = np.multiply(grad, instance_weights)
    hess = np.multiply(hess, instance_weights)
    
    grad = grad.reshape(-1, 1)
    hess = hess.reshape(-1, 1)

    return grad, hess

# Define a wrapper function that takes only y_true and y_pred as arguments
def custom_obj_wrapper_train(y_true: np.ndarray, y_pred: np.ndarray):
    # reshape y_true to be of shape (N_samples, N_binary_variables)
    y_true = y_true.reshape(y_pred.shape)
    return custom_obj(model_files, indices, train_indices, y_true, y_pred)

# Initialize an XGBClassifier model with the custom objective function
   

In [32]:
xgbmodel = XGBClassifier(objective=custom_obj_wrapper_train, tree_method="hist")

In [33]:
y_train = y_train.astype(np.int)
xgbmodel.fit(X_train, y_train)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train = y_train.astype(np.int)


Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18

In [305]:
# save the feasibility constrained model
xgbmodel.save_model("Models/Tabular/xgbmodel_feasibility_constrained.json")



In [39]:
# load original xgboost model
xgbmodel_ori = XGBClassifier(tree_method="hist")
xgbmodel_ori.load_model("Models/Tabular/xgboost_model_corlat.json")

In [40]:
# predict on test data and calculate average number of infeasible assignments
y_pred = xgbmodel.predict(X_test)
y_pred_ori = xgbmodel_ori.predict(X_test)

In [43]:
# calculate the average number of infeasible assignments
gurobi_env = gb.Env()
gurobi_env.setParam("OutputFlag", 0)

n_infeasible = 0
n_infeasible_ori = 0

# Compute the weights for each training instance
for i in range(y_pred_ori.shape[0]):
    
    model = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[i]], env=gurobi_env)
    
    modelVars = model.getVars()
    
    instanceBinaryIndices = indices[test_indices[i]]

    # need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
    for j in range(len(instanceBinaryIndices)):
        modelVars[instanceBinaryIndices[j]].setAttr("VType", "C")

        # for each index in firstInstanceTestBinaryIndices, set the value of the corresponding variable to the value predicted by xgboost
        modelVars[instanceBinaryIndices[j]].setAttr("LB", y_pred_ori[i, j])
        modelVars[instanceBinaryIndices[j]].setAttr("UB", y_pred_ori[i, j])
    
    
    # Compute the IIS to find the list of violated constraints and variables
    try:
        model.computeIIS()
        infeasible_flag = True
    except:
        infeasible_flag = False
        continue
    
    if infeasible_flag:
        # count the number of violated variables
        for j, v in enumerate(model.getVars()):
            # violated
            if (v.IISLB > 0 or v.IISUB > 0) and j in instanceBinaryIndices:
                n_infeasible += 1
        
    print(n_infeasible)

print("Average number of infeasible assignments for original xgboost model: ", n_infeasible / y_pred.shape[0])

Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
59
72
169
222
303
400
446
543
617
645
704
801
870
892
917
963
966
978
1008
1039
1122
1201
1275
1372
1469
1508
1577
1591
1637
1733
1822
1860
1957
2054
2060
2078
2170
2239
2306
2365
2387
2415
2429
2458
2555
2611
2675
2700
2746
2838
2928
2969
3066
3152
3159
3229
3315
3321
3418
3515
3551
3640
3654
3751
3795
3890
3987
4061
4158
4250
4273
4362
4410
4492
4574
4588
4673
4770
4778
4875
4890
4965
5062
5159
5163
5212
5302
5399
5482
5576
5602
5632
5703
5714
5811
5845
5929
6011
6108
6194
6291
6299
6396
6478
6569
6666
6693
6790
6808
6905
6983
7070
7150
7185
7253
7321
7335
7351
7386
7483
7575
7671
7718
7811
7886
7982
8062
8159
8256
8312
8409
8474
8571
8587
8684
8781
8878
8914
9011
9098
9159
9256
9260
9357
9431
9434
9531
9574
9659
9740
9749
9780
9838
9904
9998
10095
10122
10212
10231
10290
10378
10397
10458
10522
10558
10626
10713
10810
10892
10989
11072
11159
11249
11275
11320
11387
11418
11515
11610
11663
1176

In [45]:
# calculate the average number of infeasible assignments
gurobi_env = gb.Env()
gurobi_env.setParam("OutputFlag", 0)

n_infeasible = 0

# Compute the weights for each training instance
for i in range(y_pred_ori.shape[0]):
    
    model = gb.read("instances/mip/data/COR-LAT/" + model_files[test_indices[i]], env=gurobi_env)
    
    modelVars = model.getVars()
    
    instanceBinaryIndices = indices[test_indices[i]]

    # need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
    for j in range(len(instanceBinaryIndices)):
        modelVars[instanceBinaryIndices[j]].setAttr("VType", "C")

        # for each index in firstInstanceTestBinaryIndices, set the value of the corresponding variable to the value predicted by xgboost
        modelVars[instanceBinaryIndices[j]].setAttr("LB", y_pred[i, j])
        modelVars[instanceBinaryIndices[j]].setAttr("UB", y_pred[i, j])
    
    
    # Compute the IIS to find the list of violated constraints and variables
    try:
        model.computeIIS()
        infeasible_flag = True
    except:
        infeasible_flag = False
        continue
    
    if infeasible_flag:
        # count the number of violated variables
        for j, v in enumerate(model.getVars()):
            # violated
            if (v.IISLB > 0 or v.IISUB > 0) and j in instanceBinaryIndices:
                n_infeasible += 1
        
    print(n_infeasible)

print("Average number of infeasible assignments for constrained xgboost model: ", n_infeasible / y_pred.shape[0])

Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
2

In [46]:
# f1, precision, recall for constrained xgboost model
print("F1 score: ", f1_score(y_test, y_pred, average="micro"))
print("Precision: ", precision_score(y_test, y_pred, average="micro"))
print("Recall: ", recall_score(y_test, y_pred, average="micro"))

F1 score:  0.8429579591948815
Precision:  0.7764961805028583
Recall:  0.9218618618618618


# Neural Network Comparison

In [17]:
n_features = X_train.shape[1]
out_channels = solutions.shape[1]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [80]:
def get_feasibility_constrain_weights(models, indices, train_indices, y_true, y_pred):
    
    instance_weights = []
    
    # gurobi_env = gb.Env()
    # gurobi_env.setParam("OutputFlag", 0)

    # convert predictions of N_samples, N_variables to binary
    y_pred_binary = torch.where(y_pred > 0.5, 1, 0)
    
    # Compute the weights for each training instance
    
    for i in range(y_true.shape[0]):
        
        # t1 = time.time()
        model = models[i]
        
        
        # t2 = time.time()
        modelVars = model.getVars()
        
        # t3 = time.time()
        instanceBinaryIndices = indices[i]
        
        modelVarsBinary = list(itemgetter(*instanceBinaryIndices)(modelVars))
        
        
        # need to relax the binary variables to continuous variables with bounds of 0 and 1, we can use the setAttr method to change their vtype attribute
        # for j in range(len(instanceBinaryIndices)):
        #     modelVars[instanceBinaryIndices[j]].setAttr("VType", "C")

        #     # for each index in instanceBinaryIndices, set the value of the corresponding variable to the value predicted by the neural network
        #     modelVars[instanceBinaryIndices[j]].setAttr("LB", y_pred_binary[i, j].item())
        #     modelVars[instanceBinaryIndices[j]].setAttr("UB", y_pred_binary[i, j].item())
        
        model.setAttr("VType", modelVarsBinary, "C")
        model.setAttr("LB", modelVarsBinary, y_pred_binary[i])
        model.setAttr("UB", modelVarsBinary, y_pred_binary[i])
        
        
        # t4 = time.time()
        # Compute the IIS to find the list of violated constraints and variables
        try:
            model.computeIIS()
            feasible_flag = False
        except  Exception as e:
            print(e)
            print("Model is feasible")
            feasible_flag = True
            continue
        
        if not feasible_flag:
            # t5 = time.time()
            # Initialize the weights
            # weights = torch.zeros_like(y_true[i], dtype=torch.float32)
            c = torch.tensor(model.getAttr("Obj", modelVarsBinary), device=device)
            
            # get violated variables indices
            # violated_vars_indices = [k for k, v in enumerate(model.getVars()) if (v.IISLB > 0 or v.IISUB > 0) and k in instanceBinaryIndices]


            # t6 = time.time()
            # for j, v in enumerate(model.getVars()):
            #     # not violated
            #     if (v.IISLB == 0 and v.IISUB == 0) and j in instanceBinaryIndices:
            #         weights[j] = np.exp(np.dot(c[j], y_pred_binary[i, j].item()))
            
            IISLB = np.array(model.getAttr("IISLB", modelVarsBinary))
            IISUB = np.array(model.getAttr("IISUB", modelVarsBinary))
            violated_vars_indices = IISLB | IISUB
            # violated_vars_indices = [k for k, v in enumerate(modelVarsBinary) if not (IISLB[k] == 0 and IISUB[k] == 0)]
            # not_violated_vars_indices = [k for k, v in enumerate(modelVarsBinary) if (IISLB[k] == 0 and IISUB[k] == 0)]
            
            mask = torch.zeros_like(y_true[i], dtype=torch.bool)
            mask[violated_vars_indices] = True
                        
            # t7 = time.time()        
            weights_not_violated = torch.exp(torch.multiply(c.T, y_pred_binary[i]))
            weights_not_violated[mask] = 0.0
            

            

            # t8 = time.time()
            # denominator = sum(  np.exp(np.dot(c[k], y_pred_binary[i, k].item())) for k in range(y_pred_binary[i].shape[0]) if not k in violated_vars_indices  )
            denominator = sum(weights_not_violated)
            
            # convert denominator to torch float and move to cuda
            denominator = torch.tensor([denominator], device=device)
            # weights = weights.float().to("cpu")
            
            # weights /= denominator    
            weights_not_violated /= denominator
        
            # sort the time in descending order
            
            # t = {"Time for reading model": t2 - t1,
            #         "Time for getting model variables": t3 - t2,
            #             "Time for setting variable bounds": t4 - t3,
            #                 "Time for computing IIS": t5 - t4,
            #                     "Time for getting violated variables": t6 - t5,
            #                         "Time for computing weights": t7 - t6,
            #                             "Time for calculating masked weights": t8 - t7}
            
            # sort the time in descending order
            # t = sorted(t.items(), key=lambda x: x[1], reverse=True)
            # print(t)
        
        else:
            
            # weights are all 1.0
            weights_not_violated = torch.ones_like(y_true[i], dtype=torch.float32)
            denominator = sum(weights_not_violated)
            weights_not_violated /= denominator
        
        instance_weights.append(weights_not_violated)
    
    # convert list of tensors to tensor
    instance_weights = torch.stack(instance_weights)
           
    return instance_weights
        
        

In [86]:
# custom loss for neural network

# @torch.compile
def custom_loss(models: list, binary_indices, indices, y_pred: torch.tensor, y_true: torch.tensor):
    
    time_start = time.time()
    instance_weights = get_feasibility_constrain_weights(models, binary_indices, indices, y_true, y_pred)
    time_end = time.time()
    
    # print("Time for computing weights: ", time_end - time_start)
    
    loss_fn = nn.BCELoss(reduction='none')
    
    t1 = time.time()
    loss = loss_fn(y_pred.float(), y_true.float())
    
    t2 = time.time()
    # multiply the weights by the loss
    loss = torch.multiply(loss, instance_weights)
    
    # sum the loss
    loss = torch.mean(loss)
    
    t3 = time.time()
    mean_loss = torch.tensor(loss, device=device, requires_grad=True)
    
    # print("Time for computing loss: ", t2 - t1)
    # print("Time for multiplying weights: ", t3 - t2)
    # print("Time for computing mean loss: ", time.time() - t3)
    
    return mean_loss

In [82]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(n_features, n_features//8)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(n_features//8, n_features//16)
        self.fc3 = nn.Linear(n_features//16, n_features//32)
        self.fc4 = nn.Linear(n_features//32, out_channels)
        self.sigmoid = nn.Sigmoid()
        
        # add regularization
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        
        return x

In [73]:
def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = (
        False  # Force cuDNN to use a consistent convolution algorithm
    )
    torch.backends.cudnn.deterministic = (
        True  # Force cuDNN to use deterministic algorithms if available
    )
    torch.use_deterministic_algorithms(
        True
    )  # Force torch to use deterministic algorithms if available


config = {
        'train_val_split': [0.80, 0.20], # These must sum to 1.0
        'batch_size' : 32, # Num samples to average over for gradient updates
        'EPOCHS' : 100, # Num times to iterate over the entire dataset
        'LEARNING_RATE' : 1e-3, # Learning rate for the optimizer
        'BETA1' : 0.9, # Beta1 parameter for the Adam optimizer
        'BETA2' : 0.999, # Beta2 parameter for the Adam optimizer
        'WEIGHT_DECAY' : 1e-4, # Weight decay parameter for the Adam optimizer
    }

In [74]:
set_seeds(42)


In [83]:
net = NeuralNetwork()
net = torch.compile(net)

batch_size = 32

# optimizer = optim.SGD(net.parameters(), lr=0.001)

# create the dataloader for X and solutions
train_loader = DataLoader(
    TensorDataset(torch.tensor(X_train), torch.tensor(y_train)),
    batch_size=batch_size,
    shuffle=True,
)

valid_loader = DataLoader(
    TensorDataset(torch.tensor(X_test), torch.tensor(y_test)),
    batch_size=batch_size,
    shuffle=True,
)

params = list(net.parameters())

# optimizer = AdamW(params, lr=config['LEARNING_RATE'], weight_decay=1e-4)
optimizer = optim.Adam(net.parameters(), lr=0.0001)
# optimizer = dadaptation.DAdaptAdam(params, lr=1, log_every=5, betas=(BETA1, BETA2), weight_decay=1e-4, decouple=True)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
total_steps = len(train_loader)

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config['LEARNING_RATE'], steps_per_epoch=total_steps, epochs=config['EPOCHS'])

In [84]:
# convert to cuda
net = net.to(device)

In [77]:
models = []
# load the models
gurobi_env = gb.Env()
gurobi_env.setParam("OutputFlag", 0)
for i in range(len(model_files)):
    models.append(gb.read("instances/mip/data/COR-LAT/" + model_files[i], env=gurobi_env))
    

Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-18


In [87]:
for epoch in range(100):
    running_loss = 0.0
    curr_lr = optimizer.param_groups[0]['lr']
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        
        # time to calculate the loss
        start = time.time()
        loss = custom_loss(models, 
                           binary_indices=indices,
                           indices=train_indices,
                            y_pred=outputs,
                            y_true=labels)
        end = time.time()
        print("Time to calculate loss: ", end - start)
                           
        loss.backward()
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f lr: %.6f' % (epoch + 1, running_loss / len(train_loader), curr_lr))

  mean_loss = torch.tensor(loss, device=device, requires_grad=True)


Time to calculate loss:  25053.184173107147
