In [1]:
import torch
import torch_geometric as tg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle as pkl
import scipy
import os

from torch.nn import Linear, ReLU, Dropout
from torch.nn.functional import relu
from torch_geometric.nn import Sequential, GCNConv, JumpingKnowledge
from torch_geometric.nn import global_mean_pool

# f1 score
from sklearn.metrics import f1_score

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
try:
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))
except:
    # move dir to /ibm/gpfs/home/yjin0055/Project/DayAheadForecast
    os.chdir("/ibm/gpfs/home/yjin0055/Project/DayAheadForecast")
    corlat_dataset = pkl.load(open("Data/corlat/corlat.pickle", "rb"))
    


In [4]:
!nvidia-smi

Wed Apr 12 15:37:18 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100 80G...  On   | 00000000:17:00.0 Off |                   On |
| N/A   41C    P0    64W / 300W |   1523MiB / 81920MiB |     N/A      Default |
|                               |                      |              Enabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100 80G...  On   | 00000000:65:00.0 Off |                   On |
| N/A   34C    P0    43W / 300W |     26MiB / 81920MiB |     N/A      Default |
|       

In [5]:
corlat_dataset[0].keys()

dict_keys(['solution', 'indices', 'input'])

In [6]:
corlat_dataset[0]['solution'].keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [7]:
corlat_dataset[0]['indices'].keys()

dict_keys(['indices'])

In [8]:
corlat_dataset[0]['input'].keys()

dict_keys(['A', 'cost_vectors', 'rhs'])

In [9]:
n_vars = corlat_dataset[0]["input"]["A"].shape[1]
n_cons = corlat_dataset[0]["input"]["A"].shape[0]

N_list = []
n_vars_list = []
n_cons_list = []

for i in range(len(corlat_dataset)):
    n_vars_list.append(corlat_dataset[i]["input"]["A"].shape[1])
    n_cons_list.append(corlat_dataset[i]["input"]["A"].shape[0])
    N_list.append(corlat_dataset[i]["input"]["A"].shape[1] + corlat_dataset[i]["input"]["A"].shape[0])

In [10]:
n_sample = len(corlat_dataset)

In [11]:
input_data = []

In [12]:
for i in range(n_sample):

    tmp_dict = {}

    # for row in range(n_vars):
    #     for col in range(n_cons):
    #         if input_dict_list[i]["A"][row, col] != 0:
    #             adj_matrix[row, n_vars + col] = input_dict_list[i]["A"][row, col]
    #             adj_matrix[n_vars + col, row] = input_dict_list[i]["A"][row, col]

    I, J, V = scipy.sparse.find(corlat_dataset[i]["input"]["A"])
    # adj_matrix[I, n_vars + J] = V
    # adj_matrix[n_vars + J, I] = V

    # # convert to COO format
    edge_index = torch.stack([torch.tensor(I), torch.tensor(n_cons + J)], dim=0)

    # expand V to 2D
    edge_attr = torch.tensor(V).unsqueeze(1)

    tmp_dict["edge_index"] = edge_index
    tmp_dict["edge_attr"] = edge_attr

    input_data.append(tmp_dict)

In [13]:
# print shape of edge_index and edge_attr
print("shape of edge_index: ", input_data[0]["edge_index"].shape)
print("shape of edge_attr: ", input_data[0]["edge_attr"].shape)

shape of edge_index:  torch.Size([2, 1751])
shape of edge_attr:  torch.Size([1751, 1])


In [14]:
torch.tensor(corlat_dataset[0]["input"]["rhs"]).shape

  torch.tensor(corlat_dataset[0]["input"]["rhs"]).shape


torch.Size([470])

In [15]:
torch.tensor(corlat_dataset[i]["input"]["cost_vectors"]).shape

torch.Size([466])

In [16]:
type(corlat_dataset[1]["input"]["rhs"])

torch.Tensor

In [17]:
torch.tensor(corlat_dataset[1]["input"]["rhs"]).shape

  torch.tensor(corlat_dataset[1]["input"]["rhs"]).shape


torch.Size([470])

In [18]:
# node features
for i in range(n_sample):
    input_data[i]["x"] = torch.cat(
        [torch.tensor(corlat_dataset[i]["input"]["rhs"]), torch.tensor(corlat_dataset[i]["input"]["cost_vectors"])]
    )

    # expand dimension
    input_data[i]["x"] = input_data[i]["x"].unsqueeze(1)    
    
    input_data[i]["batch"] = torch.tensor([i] * input_data[i]["x"].shape[0])

  [torch.tensor(corlat_dataset[i]["input"]["rhs"]), torch.tensor(corlat_dataset[i]["input"]["cost_vectors"])]


In [19]:
variable_nodes = []
BATCH_SIZE = 2
variable_nodes = [
    range(n_cons_list[i] + i * N_list[i], (i + 1) * N_list[i]) for i in range(BATCH_SIZE)]

In [20]:
variable_nodes

[range(470, 936), range(1406, 1872)]

In [21]:
# variable nodes are located at range(n_cons + i * N, (i + 1) * N) for i in range(BATCH_SIZE)

BinaryNodes = [
    range(n_cons_list[i] + i * N_list[i] + corlat_dataset[0]["indices"]["indices"][0], n_cons_list[i] + i * N_list[i] + corlat_dataset[0]["indices"]["indices"][0] + (corlat_dataset[0]["indices"]["indices"][-1] - corlat_dataset[0]["indices"]["indices"][0]) + 1) for i in range(BATCH_SIZE)]

assert np.array_equal(n_cons + np.array(corlat_dataset[0]["indices"]["indices"]), np.array(BinaryNodes[0]))

In [22]:
# make output nodes indices array
output_nodes = BinaryNodes  
# flatten output_nodes
output_nodes = np.array(output_nodes).flatten()

In [23]:
# output keys are DayAheadBuySellStatus and DayAheadOnOffChargingStatus
output_data_dict = []
for i in range(n_sample):
    BinarySolution = torch.Tensor(list(corlat_dataset[i]["solution"].values()))

    # convert to binary
    BinarySolution = torch.where(BinarySolution > 0.5, 1, 0)
    
    
    tmp_dict = {"y": torch.from_numpy(np.array(BinarySolution))}
    output_data_dict.append(tmp_dict)


In [24]:
y = output_data_dict[0]["y"]

In [25]:
y.shape

torch.Size([100])

In [26]:
torch.is_floating_point(y)

False

In [27]:
torch.unique(y).numel()

2

In [28]:
int(y.max()) + 1

2

In [29]:
y.size(-1)

100

In [30]:
"""
Create a pytorch geometric dataset
1. Graph - Pass in edge_index, edge_attr
2. Node - Pass in the node features tensor for x
3. Create a dataset by subclassing PyTorch Geometric's Dataset class. At a minimum you need to implement:

    len - Returns the number of graphs in the dataset
    get - Retrieves a graph object by its index

4. You can also add additional functionality like transforms, downloading data, etc.
"""

class MIPDataset(tg.data.InMemoryDataset):
    def __init__(self, root, input_data_dict, output_data_dict, transform=None, pre_transform=None):
        self.input_data_dict = input_data_dict
        self.output_data_dict = output_data_dict
        super(MIPDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        

    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ["data.pt"]

    def download(self):
        pass

    def process(self):
        data_list = []
        for i in range(n_sample):

            data = tg.data.Data(
                x= self.input_data_dict[i]["x"],
                edge_index=self.input_data_dict[i]["edge_index"],
                edge_attr=self.input_data_dict[i]["edge_attr"],
                y=self.output_data_dict[i]["y"],
                batch=self.input_data_dict[i]["batch"],
            )
            data_list.append(data)

        data, slices = self.collate(data_list)
        
        torch.save((data, slices), self.processed_paths[0])

In [31]:
"""
Implement a GCN model

Modification to the GCN model:
1. Extend the node embeddings for layer l + 1 by concatenating the node embeddings from layer l. Specifically, we now define the embedding for layer l + 1 to be  ̃ Z(l+1) = (Z(l+1),  ̃ Z(l)), i.e., the concatenation of the matrices row-wise, with  ̃ Z(0) = Z0
2. Apply layer norm at the output of each layer
3.  modification made to a Multi-Layer Perceptron (MLP) function called fθ. 
The original function was a linear mapping followed by a fixed nonlinearity in a standard Graph Convolutional Network (GCN) developed by Kipf and Welling in 2016. 
However, in this paper, the researchers have generalized fθ to be an MLP,
"""

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels, hidden_channels, output_nodes):
        super(GCN, self).__init__()

        self.conv1 = tg.nn.GCNConv(
            in_channels, hidden_channels, cached=False, normalize=False, add_self_loops=False
        )


        self.conv2 = tg.nn.GCNConv(
            hidden_channels, hidden_channels, cached=False, normalize=False, add_self_loops=False
        )

        self.conv3 = tg.nn.GCNConv(
            2*hidden_channels, hidden_channels, cached=False, normalize=False, add_self_loops=False
        )

        self.mlp3 = torch.nn.Sequential(
            torch.nn.Linear(hidden_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, out_channels),
        )

        self.output_nodes = output_nodes


    def forward(self, x, edge_index, edge_attr):
        
        # concatenate the node embeddings from layer l. Specifically, we now define the embedding for layer l + 1 to be  ̃ Z(l+1) = (Z(l+1),  ̃ Z(l)), i.e., the concatenation of the matrices row-wise, with  ̃ Z(0) = Z0 (the first layer )

        # use a prev_x to store the previous layer's node embeddings

        x = self.conv1(x, edge_index, edge_attr)
        Z_tilde_0 = x
        relu(Z_tilde_0, inplace=True)
        # print("Z_tilde_0 shape after mlp1: ", Z_tilde_0.shape)

        Z_tilde = self.conv2(Z_tilde_0, edge_index, edge_attr)
        relu(Z_tilde, inplace=True)
        # print("Z_tilde shape after mlp2: ", Z_tilde.shape)

        Z_tilde = torch.cat([Z_tilde, Z_tilde_0], dim=-1)
        relu(Z_tilde, inplace=True)
        # print("Z_tilde shape after cat: ", Z_tilde.shape)

        Z_tilde = self.conv3(Z_tilde, edge_index, edge_attr)
        relu(Z_tilde, inplace=True)

        # print("Z_tilde shape after conv3: ", Z_tilde.shape)

        # out = Z_tilde[self.output_nodes]
        out = Z_tilde

        # print("out shape: ", out.shape)

        out = self.mlp3(out)[self.output_nodes]

        return torch.sigmoid(out)


In [32]:
# create dataloader
data_root_dir = "./Data/input_data_corlat/"
dataset = MIPDataset(root=data_root_dir, input_data_dict=input_data, output_data_dict=output_data_dict, transform=None, pre_transform=None)
dataloader = tg.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)



In [33]:
output_nodes.shape

(200,)

In [34]:
dataset.num_classes

2

In [35]:
# list available cuda device
torch.cuda.device_count()

1

In [45]:
# create model
out_channels=1
num_classes = 100
model = GCN(in_channels=1, out_channels=out_channels, hidden_channels=8, output_nodes=output_nodes).to(device)


model_test = Sequential('x, edge_index, edge_attr, batch', [
    (Dropout(p=0.5), 'x -> x'),
    (GCNConv(dataset.num_features, 64), 'x, edge_index -> x1'),
    ReLU(inplace=True),
    (GCNConv(64, 64), 'x1, edge_index -> x2'),
    ReLU(inplace=True),
    (lambda x1, x2: [x1, x2], 'x1, x2 -> xs'),
    (JumpingKnowledge("cat", 64, num_layers=2), 'xs -> x'),
    (global_mean_pool, 'x, batch -> x'),
    Linear(2 * 64, num_classes),
    ReLU(inplace=True),
    Dropout(p=0.5),
    Linear(num_classes, num_classes),
    torch.nn.Sigmoid(),
]).to(device)

print(model_test)

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): GCNConv(1, 64)
  (2): ReLU(inplace=True)
  (3): GCNConv(64, 64)
  (4): ReLU(inplace=True)
  (5): <function <lambda> at 0x1555518704c0>
  (6): JumpingKnowledge(cat)
  (7): <function global_mean_pool at 0x15548dcc69e0>
  (8): Linear(in_features=128, out_features=100, bias=True)
  (9): ReLU(inplace=True)
  (10): Dropout(p=0.5, inplace=False)
  (11): Linear(in_features=100, out_features=100, bias=True)
  (12): Sigmoid()
)


In [42]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [43]:
print(model)

GCN(
  (conv1): GCNConv(1, 8)
  (conv2): GCNConv(8, 8)
  (conv3): GCNConv(16, 8)
  (mlp3): Sequential(
    (0): Linear(in_features=8, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=1, bias=True)
  )
)


In [44]:
# test model
EPOCHS = 10
loss_fn = torch.nn.BCELoss()
for epoch in range(EPOCHS):

    total_loss = 0

    for data in dataloader:

        # zero the parameter gradients
        optimizer.zero_grad()

        # ensure x is of shape (N, 1), if not, reshape, use try except
        # ensure edge_index is of shape (2, E), if not, reshape, use try except
        # ensure edge_attr is of shape (E, 1), if not, reshape, use try except

        try:
            assert data.x.shape[1] == 1

            assert data.edge_index.shape[0] == 2

            assert data.edge_attr.shape[1] == 1

        except:
            data.x = data.x.reshape(-1, 1)
            data.edge_index = data.edge_index.reshape(2, -1)
            data.edge_attr = data.edge_attr.reshape(-1, 1)

        # convert to float
        data.x = data.x.float().to(device)
        data.edge_index = data.edge_index.long().to(device)
        data.edge_attr = data.edge_attr.float().to(device)
        data.y = data.y.float().to(device)
        data.batch = data.batch.long().to(device)
        
        # print("device of x: ", data.x.device)
        # print("device of edge_index: ", data.edge_index.device)
        # print("device of edge_attr: ", data.edge_attr.device)
        # print("device of y: ", data.y.device)
        # print("device of batch: ", data.batch.device)
        
        

        # print("Shape of x: ", data.x.shape)
        # print("Shape of edge_index: ", data.edge_index.shape)
        # print("Shape of edge_attr: ", data.edge_attr.shape)
        # out = model(data.x, data.edge_index, data.edge_attr, data.batch).squeeze().cpu()
        out = model(data.x, data.edge_index, data.edge_attr).squeeze().cpu()

        # require_grad = True
        
        loss = loss_fn(out.reshape(-1), data.y.cpu())    
        # loss.requires_grad = True    

        optimizer.zero_grad(set_to_none=True)
        optimizer.step()

        loss.backward()

        total_loss += loss.item()

        # # convert out to binary
        # binary_out = torch.where(out > 0.5, 1, 0)

        # # F1 score
        # # type cast data.y from float of 0s and 1s to binary
        # target_y = torch.where(data.y.cpu() > 0.5, 1, 0)
        # f1 = f1_score(target_y, binary_out, average='macro')

        # print("f1 score: ", f1)
    
    total_loss /= len(dataloader)
    print(f"Epoch {epoch+1}, Loss: {total_loss}")


Epoch 1, Loss: 0.7083542866110801
Epoch 2, Loss: 0.7083561928272247
Epoch 3, Loss: 0.7083542426228523
Epoch 4, Loss: 0.7083551099896431
Epoch 5, Loss: 0.7083535273075103
Epoch 6, Loss: 0.7083550285100937
Epoch 7, Loss: 0.7083536430597305
Epoch 8, Loss: 0.7083549926280975
Epoch 9, Loss: 0.7083534873723983
Epoch 10, Loss: 0.7083545327186584
