In [82]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle as pkl
import scipy
import os

from torch.nn import Linear, ReLU, Dropout
from torch.nn.functional import relu

# f1 score
from sklearn.metrics import f1_score

In [83]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat_preprocessed.pickle", "rb"))

In [84]:
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef,var_type_B,var_type_C
0,5.0,6.0,1.0,True,True,True,True,True,19.0,36.706039,1.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
1,4.0,6.0,0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
2,6.0,6.0,0.442327,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
3,5.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
4,3.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0


In [85]:
# Obtain the maximum size of N_constraints and N_variables across the dataset.

max_N_constraints = max(
    len(x["input"]["constraint_node_features"]) for x in corlat_dataset
)

max_N_variables = max(
    len(x["input"]["var_node_features"]) for x in corlat_dataset
)

min_N_constraints = min(
    len(x["input"]["constraint_node_features"]) for x in corlat_dataset
)

min_N_variables = min(
    len(x["input"]["var_node_features"]) for x in corlat_dataset
)

In [86]:
print("Maximum number of variables: ", max_N_variables)
print("Maximum number of constraints: ", max_N_constraints)
print("Minimum number of variables: ", min_N_variables)
print("Minimum number of constraints: ", min_N_constraints)

Maximum number of variables:  466
Maximum number of constraints:  551
Minimum number of variables:  466
Minimum number of constraints:  470


In [87]:
print("Number of variable node features: ", len(corlat_dataset[0]["input"]["var_node_features"].columns))
print("Number of constraint node features: ", len(corlat_dataset[0]["input"]["constraint_node_features"].columns))

Number of variable node features:  18
Number of constraint node features:  10


In [88]:
# for each variable node features, pad with 0s to make it the same length as the maximum number of variables

var_node_features = np.stack(
    [
        np.pad(
            x["input"]["var_node_features"].values,
            ((0, max_N_variables - len(x["input"]["var_node_features"])), (0, 0)),
            "constant",
        )
        for x in corlat_dataset
    ]
)
            

In [89]:
var_node_features.shape

(2000, 466, 18)

In [90]:
constraint_node_features = np.stack(
    [
        np.pad(
            x["input"]["constraint_node_features"].values,
            ((0, max_N_constraints - len(x["input"]["constraint_node_features"])), (0, 0)),
            "constant",
        )
        for x in corlat_dataset
    ]   
)

In [91]:
constraint_node_features.shape

(2000, 551, 10)

In [92]:
# for var_node_features and constraint_node_features, reshape to (N_samples, -1) to feed into the neural network
var_input = var_node_features.reshape(var_node_features.shape[0], -1)
constraint_input = constraint_node_features.reshape(constraint_node_features.shape[0], -1)

In [93]:
print("Shape of variable features input: ", var_input.shape)
print("Shape of constraint features input: ", constraint_input.shape)

Shape of variable features input:  (2000, 8388)
Shape of constraint features input:  (2000, 5510)


In [124]:
# get A matrix input by stacking the csr_matrix of each sample getting shape of N_samples x (A.shape[0] x A.shape[1])
A_input = np.vstack([x["input"]["A"] for x in corlat_dataset])

In [125]:
A_input.shape

(2000, 1)

In [126]:
A_feature_list = []
for i in range(len(corlat_dataset)):
    n_cons = corlat_dataset[i]["input"]["A"].shape[0]

    # for row in range(n_vars):
    #     for col in range(n_cons):
    #         if input_dict_list[i]["A"][row, col] != 0:
    #             adj_matrix[row, n_vars + col] = input_dict_list[i]["A"][row, col]
    #             adj_matrix[n_vars + col, row] = input_dict_list[i]["A"][row, col]

    I, J, V = scipy.sparse.find(corlat_dataset[i]["input"]["A"])
    # adj_matrix[I, n_vars + J] = V
    # adj_matrix[n_vars + J, I] = V

    # # convert to COO format
    edge_index = torch.stack([torch.tensor(I), torch.tensor(n_cons + J)], dim=0)

    # expand V to 2D
    edge_attr = torch.tensor(V).unsqueeze(1)

    tmp_dict = {"edge_index": edge_index, "edge_attr": edge_attr}
    A_feature_list.append(tmp_dict)

In [130]:
# for each sample, pad the edge_index and edge_attr to make it the same length as the maximum length of edge_index and edge_attr
max_edge_index_len = max([len(x["edge_index"][0]) for x in A_feature_list])
max_edge_attr_len = max([len(x["edge_attr"]) for x in A_feature_list])

for i in range(len(A_feature_list)):
    edge_index = A_feature_list[i]["edge_index"]
    edge_attr = A_feature_list[i]["edge_attr"]

    # pad edge_index
    edge_index = torch.cat(
        [
            edge_index,
            torch.zeros(
                2, max_edge_index_len - len(edge_index[0]), dtype=torch.long
            ),
        ],
        dim=1,
    )

    # pad edge_attr
    edge_attr = torch.cat(
        [
            edge_attr,
            torch.zeros(
                max_edge_attr_len - len(edge_attr), 1, dtype=torch.float32
            ),
        ],
        dim=0,
    )

    A_feature_list[i]["edge_index"] = edge_index
    A_feature_list[i]["edge_attr"] = edge_attr

In [132]:
# check if the padding is correct by checking the shape of edge_index and edge_attr
for i in range(len(A_feature_list)):
    assert A_feature_list[i]["edge_index"].shape == (2, max_edge_index_len)
    assert A_feature_list[i]["edge_attr"].shape == (max_edge_attr_len, 1)

In [95]:
print("Shape of A matrix input: ", A_input.shape)

Shape of A matrix input:  (2000, 1)


In [96]:
corlat_dataset[0].keys()

dict_keys(['solution', 'indices', 'input'])

In [97]:
# for each solution convert the dictionary to a list of values
solutions = [
    list(corlat_dataset[i]["solution"].values())
    for i in range(len(corlat_dataset[0]["solution"]))
]

In [98]:
# convert solutions_list to numpy array
solutions = np.array(solutions)

In [99]:
A_input[0]

array([<470x466 sparse matrix of type '<class 'numpy.float64'>'
       	with 1751 stored elements in Compressed Sparse Row format>],
      dtype=object)