In [2]:
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.optim import AdamW
from xgboost import XGBClassifier

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle as pkl
import scipy
import os

from torch.nn import Linear, ReLU, Dropout
from torch.nn.functional import relu
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix

In [3]:
def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = (
        False  # Force cuDNN to use a consistent convolution algorithm
    )
    torch.backends.cudnn.deterministic = (
        True  # Force cuDNN to use deterministic algorithms if available
    )
    torch.use_deterministic_algorithms(
        True
    )  # Force torch to use deterministic algorithms if available


In [4]:
set_seeds(42)

In [5]:
config = {
        'train_val_split': [0.80, 0.20], # These must sum to 1.0
        'batch_size' : 32, # Num samples to average over for gradient updates
        'EPOCHS' : 100, # Num times to iterate over the entire dataset
        'LEARNING_RATE' : 1e-3, # Learning rate for the optimizer
        'BETA1' : 0.9, # Beta1 parameter for the Adam optimizer
        'BETA2' : 0.999, # Beta2 parameter for the Adam optimizer
        'WEIGHT_DECAY' : 1e-4, # Weight decay parameter for the Adam optimizer
    }

In [6]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))

In [7]:
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef,var_type_B,var_type_C
0,5.0,6.0,1.0,True,True,True,True,True,19.0,36.706039,1.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
1,4.0,6.0,0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
2,6.0,6.0,0.442327,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
3,5.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
4,3.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0


In [8]:
# Obtain the maximum size of N_constraints and N_variables across the dataset.

max_N_constraints = max(
    len(x["input"]["constraint_node_features"]) for x in corlat_dataset
)

max_N_variables = max(
    len(x["input"]["var_node_features"]) for x in corlat_dataset
)

min_N_constraints = min(
    len(x["input"]["constraint_node_features"]) for x in corlat_dataset
)

min_N_variables = min(
    len(x["input"]["var_node_features"]) for x in corlat_dataset
)

In [9]:
print("Maximum number of variables: ", max_N_variables)
print("Maximum number of constraints: ", max_N_constraints)
print("Minimum number of variables: ", min_N_variables)
print("Minimum number of constraints: ", min_N_constraints)

Maximum number of variables:  466
Maximum number of constraints:  551
Minimum number of variables:  466
Minimum number of constraints:  470


In [10]:
print("Number of variable node features: ", len(corlat_dataset[0]["input"]["var_node_features"].columns))
print("Number of constraint node features: ", len(corlat_dataset[0]["input"]["constraint_node_features"].columns))

Number of variable node features:  18
Number of constraint node features:  10


In [11]:
# for each variable node features, pad with 0.0s to make it the same length as the maximum number of variables

var_node_features = np.stack(
    [
        np.pad(
            x["input"]["var_node_features"].values,
            ((0, max_N_variables - len(x["input"]["var_node_features"])), (0, 0)),
            "constant",
            constant_values=0.0,
        )
        for x in corlat_dataset
    ]
)
            

In [12]:
var_node_features.shape

(2000, 466, 18)

In [13]:
constraint_node_features = np.stack(
    [
        np.pad(
            x["input"]["constraint_node_features"].values,
            ((0, max_N_constraints - len(x["input"]["constraint_node_features"])), (0, 0)),
            "constant",
            constant_values=0.0,
        )
        for x in corlat_dataset
    ]   
)

In [14]:
constraint_node_features.shape

(2000, 551, 10)

In [15]:
# for var_node_features and constraint_node_features, reshape to (N_samples, -1) to feed into the neural network
var_input = var_node_features.reshape(var_node_features.shape[0], -1)
constraint_input = constraint_node_features.reshape(constraint_node_features.shape[0], -1)

In [16]:
print("Shape of variable features input: ", var_input.shape)
print("Shape of constraint features input: ", constraint_input.shape)

Shape of variable features input:  (2000, 8388)
Shape of constraint features input:  (2000, 5510)


In [17]:
# get A matrix input by stacking the csr_matrix of each sample getting shape of N_samples x (A.shape[0] x A.shape[1])
A_input = np.vstack([x["input"]["A"] for x in corlat_dataset])

In [18]:
A_input.shape

(2000, 1)

In [19]:
A_feature_list = []
for i in range(len(corlat_dataset)):
    n_cons = corlat_dataset[i]["input"]["A"].shape[0]

    # for row in range(n_vars):
    #     for col in range(n_cons):
    #         if input_dict_list[i]["A"][row, col] != 0:
    #             adj_matrix[row, n_vars + col] = input_dict_list[i]["A"][row, col]
    #             adj_matrix[n_vars + col, row] = input_dict_list[i]["A"][row, col]

    I, J, V = scipy.sparse.find(corlat_dataset[i]["input"]["A"])
    # adj_matrix[I, n_vars + J] = V
    # adj_matrix[n_vars + J, I] = V

    # # convert to COO format
    edge_index = torch.stack([torch.tensor(I), torch.tensor(n_cons + J)], dim=0)

    # expand V to 2D
    edge_attr = torch.tensor(V).unsqueeze(1)

    tmp_dict = {"edge_index": edge_index, "edge_attr": edge_attr}
    A_feature_list.append(tmp_dict)

In [20]:
# for each sample, pad the edge_index and edge_attr to make it the same length as the maximum length of edge_index and edge_attr
max_edge_index_len = max([len(x["edge_index"][0]) for x in A_feature_list])
max_edge_attr_len = max([len(x["edge_attr"]) for x in A_feature_list])

for i in range(len(A_feature_list)):
    edge_index = A_feature_list[i]["edge_index"]
    edge_attr = A_feature_list[i]["edge_attr"]

    # pad edge_index
    edge_index = torch.cat(
        [
            edge_index,
            torch.zeros(
                2, max_edge_index_len - len(edge_index[0]), dtype=torch.long
            ),
        ],
        dim=1,
    )

    # pad edge_attr
    edge_attr = torch.cat(
        [
            edge_attr,
            torch.zeros(
                max_edge_attr_len - len(edge_attr), 1, dtype=torch.float32
            ),
        ],
        dim=0,
    )

    A_feature_list[i]["edge_index"] = edge_index
    A_feature_list[i]["edge_attr"] = edge_attr

In [21]:
# check if the padding is correct by checking the shape of edge_index and edge_attr
for i in range(len(A_feature_list)):
    assert A_feature_list[i]["edge_index"].shape == (2, max_edge_index_len)
    assert A_feature_list[i]["edge_attr"].shape == (max_edge_attr_len, 1)

In [22]:
print("Shape of A matrix input: ", A_input.shape)

Shape of A matrix input:  (2000, 1)


In [23]:
corlat_dataset[0].keys()

dict_keys(['solution', 'indices', 'input'])

In [24]:
# for each solution convert the dictionary to a list of values
solutions = [
    list(corlat_dataset[i]["solution"].values())
    for i in range(len(corlat_dataset))
]

In [25]:
# convert solutions_list to numpy array
solutions = np.array(solutions)

In [26]:
# combine the variable features and constraint features into a single input
X = np.hstack([var_input, constraint_input])

In [27]:
n_features = X.shape[1]
out_channels = solutions.shape[1]

In [28]:
out_channels

100

In [29]:
X.shape

(2000, 13898)

In [30]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(n_features, n_features//4)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(n_features//2, n_features//2)
        self.fc3 = nn.Linear(n_features//2, n_features//2)
        self.fc4 = nn.Linear(n_features//2, out_channels)
        self.sigmoid = nn.Sigmoid()
        
        # add regularization
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        
        return x

In [31]:
# check type for one sample of solutions
solutions[0].dtype

dtype('float64')

In [32]:
# convert X and solutions to float32
X = X.astype(np.float32)
solutions = solutions.astype(np.float32)

In [33]:
# check all samples have same dimension
for i in range(len(solutions)):
    assert solutions[i].shape == (100,)

for i in range(len(X)):
    assert X[i].shape == (13898,)

In [36]:
# train test split to get indices for train and test
train_idx, test_idx = train_test_split(
    np.arange(len(solutions)), test_size=0.2, random_state=42
)

X_train = X[train_idx]
X_test = X[test_idx]
y_train = solutions[train_idx]
y_test = solutions[test_idx]

In [38]:
# X_train, X_test, y_train, y_test = train_test_split(X, solutions, test_size=0.2, random_state=42, shuffle=True)
# save the train and test data in the directory Data/corlat/
np.save("Data/corlat/X_train.npy", X_train)
np.save("Data/corlat/X_test.npy", X_test)
np.save("Data/corlat/y_train.npy", y_train)
np.save("Data/corlat/y_test.npy", y_test)
np.save("Data/corlat/train_idx.npy", train_idx)
np.save("Data/corlat/test_idx.npy", test_idx)

In [36]:
net = NeuralNetwork()
net = torch.compile(net)

batch_size = 32

criterion = nn.BCELoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001)

# create the dataloader for X and solutions
train_loader = DataLoader(
    TensorDataset(torch.tensor(X_train), torch.tensor(y_train)),
    batch_size=batch_size,
    shuffle=True,
)

valid_loader = DataLoader(
    TensorDataset(torch.tensor(X_test), torch.tensor(y_test)),
    batch_size=batch_size,
    shuffle=True,
)

params = list(net.parameters())

# optimizer = AdamW(params, lr=config['LEARNING_RATE'], weight_decay=1e-4)
optimizer = optim.SGD(net.parameters(), lr=0.001)
# optimizer = dadaptation.DAdaptAdam(params, lr=1, log_every=5, betas=(BETA1, BETA2), weight_decay=1e-4, decouple=True)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
total_steps = len(train_loader)

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config['LEARNING_RATE'], steps_per_epoch=total_steps, epochs=config['EPOCHS'])

In [37]:
for epoch in range(100):
    running_loss = 0.0
    curr_lr = optimizer.param_groups[0]['lr']
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f lr: %.6f' % (epoch + 1, running_loss / len(train_loader), curr_lr))

Epoch 1 loss: 0.804 lr: 0.000040
Epoch 2 loss: 0.696 lr: 0.000043
Epoch 3 loss: 0.674 lr: 0.000051
Epoch 4 loss: 0.659 lr: 0.000064
Epoch 5 loss: 0.648 lr: 0.000082
Epoch 6 loss: 0.635 lr: 0.000104
Epoch 7 loss: 0.621 lr: 0.000132
Epoch 8 loss: 0.605 lr: 0.000163
Epoch 9 loss: 0.592 lr: 0.000199
Epoch 10 loss: 0.580 lr: 0.000238
Epoch 11 loss: 0.568 lr: 0.000280
Epoch 12 loss: 0.559 lr: 0.000325
Epoch 13 loss: 0.552 lr: 0.000372
Epoch 14 loss: 0.547 lr: 0.000421
Epoch 15 loss: 0.544 lr: 0.000470
Epoch 16 loss: 0.541 lr: 0.000521
Epoch 17 loss: 0.540 lr: 0.000571
Epoch 18 loss: 0.539 lr: 0.000620
Epoch 19 loss: 0.536 lr: 0.000669
Epoch 20 loss: 0.536 lr: 0.000716
Epoch 21 loss: 0.535 lr: 0.000761
Epoch 22 loss: 0.533 lr: 0.000803
Epoch 23 loss: 0.533 lr: 0.000842
Epoch 24 loss: 0.532 lr: 0.000877
Epoch 25 loss: 0.531 lr: 0.000909
Epoch 26 loss: 0.530 lr: 0.000936
Epoch 27 loss: 0.530 lr: 0.000959
Epoch 28 loss: 0.529 lr: 0.000977
Epoch 29 loss: 0.528 lr: 0.000990
Epoch 30 loss: 0.528 lr

In [38]:
# validation of the model using f1 score, precision and recall
y_pred = net(torch.tensor(X_test))
y_pred = y_pred.detach().numpy()
y_pred = np.where(y_pred > 0.5, 1, 0)

y_test = y_test.astype(np.int)

print("F1 score: ", f1_score(y_test, y_pred, average="micro"))
print("Precision: ", precision_score(y_test, y_pred, average="micro"))
print("Recall: ", recall_score(y_test, y_pred, average="micro"))


F1 score:  0.6333832131682764
Precision:  0.7301238827034656
Recall:  0.5592792792792792


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_test = y_test.astype(np.int)


In [39]:
# XGBoost model
y_test = y_test.astype(np.int)
y_train = y_train.astype(np.int)
clf = XGBClassifier(tree_method='hist')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_test = y_test.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train = y_train.astype(np.int)


In [40]:
print("F1 score: ", f1_score(y_test, y_pred, average="micro"))
print("Precision: ", precision_score(y_test, y_pred, average="micro"))
print("Recall: ", recall_score(y_test, y_pred, average="micro"))

F1 score:  0.9237003241685676
Precision:  0.9232569302772111
Recall:  0.9241441441441441


In [43]:
# save xgboost model
print("Saving model...")
pkl.dump(clf, open("Models/Tabular/xgboost_model_corlat.pkl", "wb"))

Saving model...


In [44]:
# save neural network model
print("Saving model...")
# statedict
torch.save(net.state_dict(), "Models/Tabular/neural_network_model_corlat.pt")

Saving model...


In [47]:
# try to load the model net = NeuralNetwork()
net.load_state_dict(torch.load("Models/Tabular/neural_network_model_corlat.pt"))

<All keys matched successfully>