In [1]:
import torch
import pandas as pd
import numpy as np
import pickle as pkl
import scipy
import os

In [None]:
!nvidia-smi

In [2]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))

In [8]:
# print the keys of the dataset
print("keys: ", corlat_dataset[0].keys())
print("input keys: ", corlat_dataset[0]["input"].keys())


keys:  dict_keys(['solution', 'indices', 'input'])
input keys:  dict_keys(['A', 'var_node_features', 'constraint_node_features'])


In [11]:
print("Var node features shape: ", corlat_dataset[0]["input"]["var_node_features"].shape)
print("Constraint node features shape: ", corlat_dataset[0]["input"]["constraint_node_features"].shape)

Var node features shape:  (466, 17)
Constraint node features shape:  (470, 9)


In [12]:
# names of the variable features
# 1. Variable objective coefficient
# 2. Variable type
# 3. Number of non-zero coefficients in the constraint
# 4. LP relaxation value at root node
# 5. Is LP relaxation value fractional
# 6. LP solution value equals lower bound
# 7. LP solution value equals upper bound
# 8. Has lower bound
# 9. Has upper bound
# 10. Mean degree of the constraint nodes connected to the variable
# 11. Std. deviation of the degree of the constraint nodes connected to the variable
# 12. Min. degree of the constraint nodes connected to the variable
# 13. Max. degree of the constraint nodes connected to the variable
# 14. Mean coefficient of the constraint nodes connected to the variable
# 15. Std. deviation of the coefficient of the constraint nodes connected to the variable
# 16. Min. coefficient of the constraint nodes connected to the variable
# 17. Max. coefficient of the constraint nodes connected to the variable

In [16]:
# print feature type for variable node
for i in range(corlat_dataset[0]["input"]["var_node_features"].shape[1]):
    print("feature", i, ":", type(corlat_dataset[0]["input"]["var_node_features"][0][i]))

feature 0 : <class 'numpy.str_'>
feature 1 : <class 'numpy.str_'>
feature 2 : <class 'numpy.str_'>
feature 3 : <class 'numpy.str_'>
feature 4 : <class 'numpy.str_'>
feature 5 : <class 'numpy.str_'>
feature 6 : <class 'numpy.str_'>
feature 7 : <class 'numpy.str_'>
feature 8 : <class 'numpy.str_'>
feature 9 : <class 'numpy.str_'>
feature 10 : <class 'numpy.str_'>
feature 11 : <class 'numpy.str_'>
feature 12 : <class 'numpy.str_'>
feature 13 : <class 'numpy.str_'>
feature 14 : <class 'numpy.str_'>
feature 15 : <class 'numpy.str_'>
feature 16 : <class 'numpy.str_'>


In [27]:
# print one row of the variable node features
print(corlat_dataset[0]["input"]["var_node_features"][0])

['5.0' 'B' '6' '1.0' '0.0' '0.0' '1.0' '1.0' '1.0' '-50.166666666666664'
 '49.83779244263891' '-100.0' '1.0' '-50.166666666666664'
 '49.83779244263891' '-100.0' '1.0']


In [29]:
# make dataframe for variable node features
for i in range(len(corlat_dataset)):
    corlat_dataset[i]["input"]["var_node_features"] = pd.DataFrame(
        corlat_dataset[i]["input"]["var_node_features"]
    )
    corlat_dataset[i]["input"]["var_node_features"].columns = [
        "var_obj_coef",
        "var_type",
        "num_nonzero_coef",
        "lp_relax_val",
        "is_lp_relax_val_frac",
        "lp_sol_val_eq_lb",
        "lp_sol_val_eq_ub",
        "has_lb",
        "has_ub",
        "mean_degree",
        "std_degree",
        "min_degree",
        "max_degree",
        "mean_coef",
        "std_coef",
        "min_coef",
        "max_coef",
    ]

In [33]:
# print head of the variable node features
corlat_dataset[0]["input"]["var_node_features"].head()

Unnamed: 0,var_obj_coef,var_type,num_nonzero_coef,lp_relax_val,is_lp_relax_val_frac,lp_sol_val_eq_lb,lp_sol_val_eq_ub,has_lb,has_ub,mean_degree,std_degree,min_degree,max_degree,mean_coef,std_coef,min_coef,max_coef
0,5.0,B,6,1.0,0.0,0.0,1.0,1.0,1.0,-50.16666666666666,49.83779244263891,-100.0,1.0,-50.16666666666666,49.83779244263891,-100.0,1.0
1,4.0,B,6,0.0,0.0,1.0,0.0,1.0,1.0,-48.833333333333336,51.27512934053788,-100.0,9.0,-48.833333333333336,51.27512934053788,-100.0,9.0
2,6.0,B,6,-0.0,0.0,1.0,0.0,1.0,1.0,-50.16666666666666,49.83779244263891,-100.0,1.0,-50.16666666666666,49.83779244263891,-100.0,1.0
3,5.0,B,6,-0.0,0.0,1.0,0.0,1.0,1.0,-48.833333333333336,51.27512934053788,-100.0,9.0,-48.833333333333336,51.27512934053788,-100.0,9.0
4,3.0,B,6,-0.0,0.0,1.0,0.0,1.0,1.0,-48.833333333333336,51.27512934053788,-100.0,9.0,-48.833333333333336,51.27512934053788,-100.0,9.0


In [31]:
# convert column types
# dtype for variable node features
# 1. float
# 2. str
# 3. int
# 4. float
# 5. bool
# 6. bool
# 7. bool
# 8. bool
# 9. bool
# 10. float
# 11. float
# 12. int
# 13. int
# 14. float
# 15. float
# 16. float
# 17. float
var_column_types = {
    "var_obj_coef": float,
    "var_type": str,
    "num_nonzero_coef": int,
    "lp_relax_val": float,
    "is_lp_relax_val_frac": bool,
    "lp_sol_val_eq_lb": bool,
    "lp_sol_val_eq_ub": bool,
    "has_lb": bool,
    "has_ub": bool,
    "mean_degree": float,
    "std_degree": float,
    "min_degree": int,
    "max_degree": int,
    "mean_coef": float,
    "std_coef": float,
    "min_coef": float,
    "max_coef": float,
}
for i in range(len(corlat_dataset)):
    corlat_dataset[i]["input"]["var_node_features"] = corlat_dataset[i][
        "input"
    ]["var_node_features"].astype(var_column_types)

ValueError: invalid literal for int() with base 10: '-100.0'