In [1]:
import torch
import torch_geometric as tg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pickle as pkl
import scipy

device = torch.device("cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = "./Data/output/"
n_sample = 30

input_name = "input_dict_"
solution_name = "solution_dict_"

input_dict_list = []
solution_dict_list = []

# read input and solution dict pkl file from 1 to n_sample
for i in range(1, n_sample + 1):
    with open(data_path + input_name + str(i) + ".pkl", "rb") as f:
        input_dict_list.append(pkl.load(f))

    with open(data_path + solution_name + str(i) + ".pkl", "rb") as f:
        solution_dict_list.append(pkl.load(f))

In [3]:
input_dict_list[0]["A"]

<1119720x577440 sparse matrix of type '<class 'numpy.float64'>'
	with 2392970 stored elements in Compressed Sparse Row format>

In [4]:
n_vars = input_dict_list[0]["A"].shape[1]
n_cons = input_dict_list[0]["A"].shape[0]

N = n_vars + n_cons

print(N)

1697160


In [5]:
# convert A matrix of bipartite data to COO format

input_data_dict = []

for i in range(n_sample):

    tmp_dict = {}

    # for row in range(n_vars):
    #     for col in range(n_cons):
    #         if input_dict_list[i]["A"][row, col] != 0:
    #             adj_matrix[row, n_vars + col] = input_dict_list[i]["A"][row, col]
    #             adj_matrix[n_vars + col, row] = input_dict_list[i]["A"][row, col]

    I, J, V = scipy.sparse.find(input_dict_list[i]["A"])
    # adj_matrix[I, n_vars + J] = V
    # adj_matrix[n_vars + J, I] = V

    # # convert to COO format
    edge_index = torch.stack([torch.tensor(I), torch.tensor(n_vars + J)], dim=0)
    edge_attr = torch.tensor(V)

    tmp_dict["edge_index"] = edge_index
    tmp_dict["edge_attr"] = edge_attr

    input_data_dict.append(tmp_dict)


In [6]:
# print shape of edge_index and edge_attr
print("shape of edge_index: ", input_data_dict[0]["edge_index"].shape)
print("shape of edge_attr: ", input_data_dict[0]["edge_attr"].shape)

shape of edge_index:  torch.Size([2, 2392970])
shape of edge_attr:  torch.Size([2392970])


In [7]:
print(input_dict_list[0]["b"].shape)
print(input_dict_list[0]["c"].shape)

(1119720,)
(577440,)


In [8]:
# node features
for i in range(n_sample):
    input_data_dict[i]["x"] = torch.cat(
        [torch.tensor(input_dict_list[i]["b"]), torch.tensor(input_dict_list[i]["c"])]
    )

In [9]:
"""
Create a pytorch geometric dataset
1. Graph - Pass in edge_index, edge_attr
2. Node - Pass in the node features tensor for x
3. Create a dataset by subclassing PyTorch Geometric's Dataset class. At a minimum you need to implement:

    len - Returns the number of graphs in the dataset
    get - Retrieves a graph object by its index

4. You can also add additional functionality like transforms, downloading data, etc.
"""

class MIPDataset(tg.data.Dataset):
    """
    A PyTorch Geometric Dataset for MIP

    Parameters
    ----------
    input_data_dict : list of dict
        A list of dict containing the edge_index, edge_attr, and x
    transform : callable, optional
        A function/transform that takes in a graph and returns a transformed version.
        The transform must be compatible with PyTorch Geometric.
    pre_transform : callable, optional
        A function/transform that takes in a graph and returns a transformed version.
        The transform must be compatible with PyTorch Geometric.
    """

    def __init__(self, input_data_dict, transform=None, pre_transform=None):
        super(MIPDataset, self).__init__(transform, pre_transform)
        self.data, self.slices = self.collate(input_data_dict)

    @staticmethod
    def process(input_data_dict):
        data_list = []
        for i in range(len(input_data_dict)):
            data = tg.data.Data(
                x=input_data_dict[i]["x"],
                edge_index=input_data_dict[i]["edge_index"],
                edge_attr=input_data_dict[i]["edge_attr"],
            )
            data_list.append(data)
        return data_list

    def __len__(self):
        return len(self.data)

    def get(self, idx):
        return self.data[idx]


In [None]:
"""
Implement a GCN model

Modification to the GCN model:
1. Extend the node embeddings for layer l + 1 by concatenating the node embeddings from layer l. Specifically, we now define the embedding for layer l + 1 to be  ̃ Z(l+1) = (Z(l+1),  ̃ Z(l)), i.e., the concatenation of the matrices row-wise, with  ̃ Z(0) = Z0
2. Apply layer norm at the output of each layer
3.  modification made to a Multi-Layer Perceptron (MLP) function called fθ. 
The original function was a linear mapping followed by a fixed nonlinearity in a standard Graph Convolutional Network (GCN) developed by Kipf and Welling in 2016. 
However, in this paper, the researchers have generalized fθ to be an MLP,
"""

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels, hidden_channels, num_layers):
        super(GCN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(
            tg.nn.GCNConv(in_channels, hidden_channels, cached=True, normalize=False)
        )
        for _ in range(num_layers - 2):
            self.convs.append(
                tg.nn.GCNConv(
                    hidden_channels, hidden_channels, cached=True, normalize=False
                )
            )
        self.convs.append(
            tg.nn.GCNConv(hidden_channels, out_channels, cached=True, normalize=False)
        )

        self.bns = torch.nn.ModuleList()
        for _ in range(num_layers - 1):
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(2 * out_channels, out_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(out_channels, out_channels),
        )

    def forward(self, x, edge_index, edge_attr):
        
        # concatenate the node embeddings from layer l. Specifically, we now define the embedding for layer l + 1 to be  ̃ Z(l+1) = (Z(l+1),  ̃ Z(l)), i.e., the concatenation of the matrices row-wise, with  ̃ Z(0) = Z0 (the first layer )

        for i, conv in enumerate(self.convs):
            # use a prev_x to store the previous layer's node embeddings
            prev_x = x.copy()
            x = conv(x, edge_index, edge_attr)
            x = torch.cat([x, prev_x], dim=1)
            x = self.bns[i](x)
            x = self.mlp(x)
        
        x = torch.sigmoid(x)

        return x
