In [34]:
import torch
import pandas as pd
import numpy as np
import pickle as pkl
import scipy
import os

In [35]:
!nvidia-smi

Thu Apr 13 11:18:03 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100 80G...  On   | 00000000:17:00.0 Off |                   On |
| N/A   34C    P0    42W / 300W |     24MiB / 81920MiB |     N/A      Default |
|                               |                      |              Enabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100 80G...  On   | 00000000:65:00.0 Off |                   On |
| N/A   34C    P0    43W / 300W |     26MiB / 81920MiB |     N/A      Default |
|       

In [75]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))

In [76]:
# print the keys of the dataset
print("keys: ", corlat_dataset[0].keys())
print("input keys: ", corlat_dataset[0]["input"].keys())


keys:  dict_keys(['solution', 'indices', 'input'])
input keys:  dict_keys(['A', 'var_node_features', 'constraint_node_features'])


In [77]:
print("Var node features shape: ", corlat_dataset[0]["input"]["var_node_features"].shape)
print("Constraint node features shape: ", corlat_dataset[0]["input"]["constraint_node_features"].shape)

Var node features shape:  (466, 17)
Constraint node features shape:  (470, 9)


In [78]:
# names of the variable features
# 1. Variable objective coefficient
# 2. Variable type
# 3. Number of non-zero coefficients in the constraint
# 4. LP relaxation value at root node
# 5. Is LP relaxation value fractional
# 6. LP solution value equals lower bound
# 7. LP solution value equals upper bound
# 8. Has lower bound
# 9. Has upper bound
# 10. Mean degree of the constraint nodes connected to the variable
# 11. Std. deviation of the degree of the constraint nodes connected to the variable
# 12. Min. degree of the constraint nodes connected to the variable
# 13. Max. degree of the constraint nodes connected to the variable
# 14. Mean coefficient of the constraint nodes connected to the variable
# 15. Std. deviation of the coefficient of the constraint nodes connected to the variable
# 16. Min. coefficient of the constraint nodes connected to the variable
# 17. Max. coefficient of the constraint nodes connected to the variable

In [79]:
# print feature type for variable node
for i in range(corlat_dataset[0]["input"]["var_node_features"].shape[1]):
    print("feature", i, ":", type(corlat_dataset[0]["input"]["var_node_features"][0][i]))

feature 0 : <class 'numpy.str_'>
feature 1 : <class 'numpy.str_'>
feature 2 : <class 'numpy.str_'>
feature 3 : <class 'numpy.str_'>
feature 4 : <class 'numpy.str_'>
feature 5 : <class 'numpy.str_'>
feature 6 : <class 'numpy.str_'>
feature 7 : <class 'numpy.str_'>
feature 8 : <class 'numpy.str_'>
feature 9 : <class 'numpy.str_'>
feature 10 : <class 'numpy.str_'>
feature 11 : <class 'numpy.str_'>
feature 12 : <class 'numpy.str_'>
feature 13 : <class 'numpy.str_'>
feature 14 : <class 'numpy.str_'>
feature 15 : <class 'numpy.str_'>
feature 16 : <class 'numpy.str_'>


In [80]:
# print one row of the variable node features
print(corlat_dataset[0]["input"]["var_node_features"][0])

['5.0' 'B' '6' '1.0' '0.0' '0.0' '1.0' '1.0' '1.0' '19.0'
 '36.706039466732626' '1.0' '101.0' '-50.166666666666664'
 '49.83779244263891' '-100.0' '1.0']


In [81]:
# make dataframe for variable node features
for i in range(len(corlat_dataset)):
    corlat_dataset[i]["input"]["var_node_features"] = pd.DataFrame(
        corlat_dataset[i]["input"]["var_node_features"]
    )
    corlat_dataset[i]["input"]["var_node_features"].columns = [
        "var_obj_coef",
        "var_type",
        "num_nonzero_coef",
        "lp_relax_val",
        "is_lp_relax_val_frac",
        "lp_sol_val_eq_lb",
        "lp_sol_val_eq_ub",
        "has_lb",
        "has_ub",
        "mean_degree",
        "std_degree",
        "min_degree",
        "max_degree",
        "mean_coef",
        "std_coef",
        "min_coef",
        "max_coef",
    ]

In [82]:
# print head of the variable node features
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,var_type,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef
0,5.0,B,6,1.0,0.0,0.0,1.0,1.0,1.0,19.0,36.70603946673263,1.0,101.0,-50.16666666666666,49.83779244263891,-100.0,1.0
1,4.0,B,6,0.0,0.0,1.0,0.0,1.0,1.0,35.333333333333336,45.415366953879776,2.0,101.0,-48.833333333333336,51.27512934053788,-100.0,9.0
2,6.0,B,6,0.4423271989566729,1.0,0.0,0.0,1.0,1.0,35.333333333333336,45.415366953879776,2.0,101.0,-50.16666666666666,49.83779244263891,-100.0,1.0
3,5.0,B,6,-0.0,0.0,1.0,0.0,1.0,1.0,35.333333333333336,45.415366953879776,2.0,101.0,-48.833333333333336,51.27512934053788,-100.0,9.0
4,3.0,B,6,-0.0,0.0,1.0,0.0,1.0,1.0,35.333333333333336,45.415366953879776,2.0,101.0,-48.833333333333336,51.27512934053788,-100.0,9.0


In [83]:
# convert column types
# dtype for variable node features
# 1. float
# 2. str
# 3. float
# 4. float
# 5. bool
# 6. bool
# 7. bool
# 8. bool
# 9. bool
# 10. float
# 11. float
# 12. float
# 13. float
# 14. float
# 15. float
# 16. float
# 17. float
var_column_types = {
    "var_obj_coef": float,
    "var_type": str,
    "num_nonzero_coef": float,
    "lp_relax_val": float,
    "is_lp_relax_val_frac": bool,
    "lp_sol_val_eq_lb": bool,
    "lp_sol_val_eq_ub": bool,
    "has_lb": bool,
    "has_ub": bool,
    "mean_degree": float,
    "std_degree": float,
    "min_degree": float,
    "max_degree": float,
    "mean_coef": float,
    "std_coef": float,
    "min_coef": float,
    "max_coef": float,
}
for i in range(len(corlat_dataset)):
    corlat_dataset[i]["input"]["var_node_features"] = corlat_dataset[i][
        "input"
    ]["var_node_features"].astype(var_column_types)

In [84]:
# head of the variable node features
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,var_type,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef
0,5.0,B,6.0,1.0,True,True,True,True,True,19.0,36.706039,1.0,101.0,-50.166667,49.837792,-100.0,1.0
1,4.0,B,6.0,0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0
2,6.0,B,6.0,0.442327,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-50.166667,49.837792,-100.0,1.0
3,5.0,B,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0
4,3.0,B,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0


In [85]:
# now one hot encode var_type column
# ('C' for continuous, 'B' for binary, 'I' for integer, 'S' for semi-continuous, or 'N' for semi-integer).
# Therefore we result in a total of 5 columns for var_type after one hot encoding
# The dataset only contains 'C' and 'B' for var_type so we only need 2 columns
for i in range(len(corlat_dataset)):
    dummies = pd.get_dummies(corlat_dataset[i]["input"]["var_node_features"]["var_type"], prefix="var_type", prefix_sep="_")
    
    # simple unit test to make sure that the one hot encoding converts the var_type column to 2 columns
    assert dummies.shape[1] == 2
    
    # add the one hot encoded columns to the dataframe
    corlat_dataset[i]["input"]["var_node_features"] = pd.concat(
        [corlat_dataset[i]["input"]["var_node_features"], dummies], axis=1
    )
    
    # drop the original var_type column
    corlat_dataset[i]["input"]["var_node_features"] = corlat_dataset[i]["input"][
        "var_node_features"
    ].drop("var_type", axis=1) 

In [86]:
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef,var_type_B,var_type_C
0,5.0,6.0,1.0,True,True,True,True,True,19.0,36.706039,1.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
1,4.0,6.0,0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
2,6.0,6.0,0.442327,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-50.166667,49.837792,-100.0,1.0,1,0
3,5.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
4,3.0,6.0,-0.0,True,True,True,True,True,35.333333,45.415367,2.0,101.0,-48.833333,51.275129,-100.0,9.0,1,0
