In [11]:
import torch
from torch_geometric.data import Data
import networkx as nx

In [12]:
graphs = []

ngraphs = 100
for idx in range(ngraphs):
    n = 10 # Graph size
    g = nx.path_graph(n) # Path graph
    edge_index = torch.tensor(list(g.edges())).t().contiguous()

    # Data and labels are mirrors
    x = torch.randperm(n)
    y = torch.flip(x, [0])
    x = x.to(dtype=torch.float32).unsqueeze(dim=1)

    # Mask
    k = n//10
    mask = torch.cat((torch.ones(k), -torch.ones(k), torch.zeros(n - 2 * k)))
    mask = mask[torch.randperm(n)]
    train_mask = mask == 0
    val_mask = mask == 1
    test_mask = mask == -1

    val_mask[(idx+1)%3] = 1
    test_mask[(idx+2)%3] = 1
    train_mask[val_mask | test_mask] = 0

    # Create the graph
    data = Data(edge_index=edge_index, x=x, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask)
    graphs.append(data)

In [13]:
import pickle

fname = "mirror.pkl"

with open(fname, "wb") as f:
    pickle.dump(graphs, f)

In [14]:
import hashlib

def calculate_md5(file_path):
    # Create an MD5 hash object
    md5 = hashlib.md5()

    # Open the file in binary mode
    with open(file_path, "rb") as file:
        # Read the file in chunks
        chunk_size = 8192
        for chunk in iter(lambda: file.read(chunk_size), b""):
            # Update the hash with the current chunk
            md5.update(chunk)

    # Return the hexadecimal representation of the digest
    return md5.hexdigest()

In [15]:
from torch_geometric.data import InMemoryDataset, download_url
import os
import os.path as osp
import shutil
import fsspec
import io

def torch_save(data, path) -> None:
    buffer = io.BytesIO()
    torch.save(data, buffer)
    with fsspec.open(path, 'wb') as f:
        f.write(buffer.getvalue())

def torch_load(path):
    with fsspec.open(path, 'rb') as f:
        return torch.load(f)

class Mirror(InMemoryDataset):
    def __init__(self):
        super().__init__('/tmp/Mirror')
        path = osp.join(self.processed_dir, self.processed_file_names[0])
        self.load(path)

    def download(self):
        src = "/nobackup/vbalivada/GraphGPS/cs762/mirror.pkl"
        dst = osp.join(self.raw_dir, "mirror.pkl")
        shutil.copy(src, dst)

    @property
    def raw_file_names(self):
        return ['mirror.pkl']

    @property
    def processed_file_names(self):
        return ['data.pt']

    def process(self):
        with open(osp.join(self.raw_dir, "mirror.pkl"), "rb") as f:
            graphs = pickle.load(f)
        self.save(self.__class__, graphs, osp.join(self.processed_dir, self.processed_file_names[0]))

    @property
    def processed_file_names(self):
        return ["data.pt"]

    @staticmethod
    def save(cls, data_list, path):
        r"""Saves a list of data objects to the file path :obj:`path`."""
        data, slices = cls.collate(data_list)
        torch_save((data.to_dict(), slices), path)
    
    def load(self, path):
        r"""Loads the dataset from the file path :obj:`path`."""
        data, self.slices = torch_load(path)
        if isinstance(data, dict):  # Backward compatibility.
            data = Data.from_dict(data)
        self.data = data

In [16]:
dataset = Mirror()
g = dataset[0]

Processing...
Done!


In [7]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    for data in dataset:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

In [9]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.5000


In [10]:
pred[data.test_mask], data.y[data.test_mask]

(tensor([3, 6], device='cuda:0'), tensor([5, 6], device='cuda:0'))