In [60]:
# Import pytorch-geometric
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.data import Dataset
from scipy.io import loadmat
import pickle
import numpy as np
from tqdm import tqdm

In [8]:
# Loading data
data_file = loadmat('./Data/Amazon.mat')
labels = data_file['label'].flatten()
feat_data = data_file['features'].todense().A

with open('./Data/amz_homo_adjlists.pickle', 'rb') as file:
    homo = pickle.load(file)

Data class atributes:
 - data.x: Node feature matrix with shape [num_nodes, num_node_features]

 - data.edge_index: Graph connectivity in COO format with shape [2, num_edges] and type torch.long

 - data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]

 - data.y: Target to train against (may have arbitrary shape), e.g., node-level targets of shape [num_nodes, *] or graph-level targets of shape [1, *]

 - data.pos: Node position matrix with shape [num_nodes, num_dimensions]

In [30]:
edges_list = []
for i in range(len(homo)):
    edges_list.extend([(i, node) for node in homo[i]])

edges_list = np.array(edges_list)
edges_list = edges_list.transpose()

In [85]:
graph = Data(x=torch.tensor(feat_data), 
            edge_index=torch.tensor(edges_list), 
            y=torch.tensor(labels.reshape(1, len(labels))))

In [59]:
graph

Data(x=[11944, 25], edge_index=[2, 8808728], y=[1, 11944])

In [61]:
# Creating a dataset with the graph

class Amz_dataset(Dataset):
    def __init__(self, graph_list):
        super().__init__(graph_list)
        self.graphs = graph_list

    def len(self):
        return len(self.graphs)

    def get(self, idx):
        data = self.graphs[idx]
        return data
    
dataset = Amz_dataset([graph])

In [62]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [65]:
for batch in dataloader:
    print(batch)
    break

DataBatch(x=[11944, 25], edge_index=[2, 8808728], y=[1, 11944], batch=[11944], ptr=[2])


In [78]:
import torch_geometric.transforms as T

split = T.RandomNodeSplit(num_val=0.2, num_test=0.2)
graph = split(graph)
graph

Data(x=[11944, 25], edge_index=[2, 8808728], y=[1, 11944], train_mask=[11944], val_mask=[11944], test_mask=[11944])

In [82]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

# Simple GCN model (https://towardsdatascience.com/graph-neural-networks-with-pyg-on-node-classification-link-prediction-and-anomaly-detection-14aa38fe1275)
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(25, 16)
        self.conv2 = GCNConv(16, 2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        output = self.conv2(x, edge_index)

        return output
    
simple_model = GCN().double()

In [86]:
simple_model(graph).shape

torch.Size([11944, 2])