In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx
from skimage.util import montage
from IPython.display import Image, display, SVG, clear_output, HTML

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = (6, 6)
plt.rcParams["figure.dpi"] = 125
plt.rcParams["font.size"] = 14
plt.rcParams["font.sans-serif"] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid':False})
plt.rcParams['image.cmap'] = 'gray'

import scipy.sparse as sp
from sklearn.neighbors import kneighbors_graph
from tensorflow.keras.datasets import mnist as m
from sklearn.model_selection import train_test_split

MNIST_SIZE = 28

In [2]:
class ImageGrid(object):
    def __init__(self, array=None, diff_edge=False, seed=32):
        """[summary]

        Args:
            array ([numpy array], optional): [H W C=1,3 tensor]. Defaults to None.
            diff_edge (bool, optional): [description]. Defaults to False.
        """
        self.reset()
        # self.set_array(array, diff_edge=diff_edge)
        self.array = array.astype(np.float32) if array.ndim == 3 else np.expand_dims(array, axis=2)
        self.height, self.width = array.shape[:2]
        self.seed = seed
        # Create the graph
        self.graph = nx.grid_2d_graph(self.height, self.width)

        self.set_nodes(weight=None)
        self.set_edges(weight=None, diff_edge=diff_edge)
        self.number_of_nodes = self.graph.number_of_nodes()
        self.number_of_edges = self.graph.number_of_edges()

    def reset(self):
        self.height = 0
        self.width = 0
        self.array = None
        self.graph = None

    def set_edges(self, weight=None, diff_edge=True, cc=8):
        if cc==8:
            k=1
            self.graph.add_edges_from([
                                          ((x+0, y+0), (x+0, y+k))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ] + [
                                          ((x+0, y+0), (x+k, y+0))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ] + [
                                          ((x+k, y+0), (x+k, y+k))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ] + [
                                          ((x+0, y+k), (x+k, y+k))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ], weight=0.5)

            # diagonal edges
            self.graph.add_edges_from([
                                          ((x+0, y+0), (x+k, y+k))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ] + [
                                          ((x+k, y+0), (x+0, y+k))
                                          for x in range(self.width-k)
                                          for y in range(self.height-k)
                                      ], weight=0.5)

            np.random.seed(self.seed)
            # k = np.random.choice([2, 4, 8, 16])
            # straight edges
            x = np.random.choice(self.width, size=(self.height*self.width//4), replace=True)
            y = np.random.choice(self.height,  size=(self.height*self.width//4), replace=True)
            u = np.random.choice(self.width, size=(self.height*self.width//4), replace=True)
            v = np.random.choice(self.height,  size=(self.height*self.width//4), replace=True)
            self.graph.add_edges_from([
                ((x, y), (u, v)) for x, y, u, v in zip(x, y, u, v)
            ], weight=0.5)

        elif cc==4:
            pass

        # Add edge to master node
        self.graph.add_edges_from([
            ((x, y), (-1, 0))
            for x in range(self.width)
            for y in range(self.height)
        ], weight=0.5)

        self.graph.add_edges_from([
            ((x, y), (0, -1))
            for x in range(self.width)
            for y in range(self.height)
        ], weight=0.5)


        if diff_edge=="mean":
            for e, edge in enumerate(self.graph.edges):
                # Compute the mean of the affinity
                self.graph.edges[edge]['weight'] = (self.array[edge[0]] - self.array[edge[1]]) * 0.5
        elif diff_edge=="diff":
            for e, edge in enumerate(self.graph.edges):
                self.graph.edges[edge]['weight'] = ( 1 - np.abs(self.array[edge[0]] - self.array[edge[1]]) )

    def set_nodes(self, weight=None):
        for n, node in enumerate(self.graph.nodes):
            y = n // self.width
            x = n % self.width
            self.graph.nodes[node]['weight'] = self.array[y,x,:]\
                if weight is None else np.array([weight], dtype=np.float32)
        # Set master node
        self.graph.add_node((-1, 0), weight=np.array([1.0], dtype=np.float32) if weight is None else np.array([weight], dtype=np.float32))
        self.graph.add_node((0, -1), weight=np.array([0.0], dtype=np.float32) if weight is None else np.array([weight], dtype=np.float32))

In [3]:
(X_train, y_train), (X_test, y_test) = m.load_data()
X_train, X_test = X_train / 255., X_test / 255.
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=10000, random_state=0)

In [4]:
print(X_train.shape)
print(X_test.shape)

(50000, 28, 28)
(10000, 28, 28)


In [5]:
# !pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.11.0+cu113.html

In [6]:
import torch_geometric
grid = ImageGrid(X_train[0], diff_edge="diff", seed=42)
G = torch_geometric.utils.from_networkx(grid.graph,
                                                  group_node_attrs=['weight'],
                                                  group_edge_attrs=['weight'])
G.y = [y_train[0]]
G.y[0]

  data[key] = torch.tensor(value)


7

In [7]:
import os
import torch
from tqdm.notebook import tqdm
from torch_geometric.data import Data, Dataset

class ImageDataset(Dataset):
  def __init__(self, root, test=False, transform=None, pre_transform=None):
    self.test = test
    super(ImageDataset, self).__init__(root, transform, pre_transform)
  
  @property
  def raw_file_names(self):
      return "GraphMNIST.csv"
  @property
  def processed_file_names(self):
    if self.test:
      return [f'data_test_{i}.pt' for i in range(len(X_test))]
    else:
      return [f'data_{i}.pt' for i in range(len(X_train))]

  def download(self):
    pass 
  
  def process(self):
    if self.test:
      for i, j in tqdm(enumerate(X_test), total=len(X_test)):
        grid = ImageGrid(X_test[i], diff_edge="diff", seed=42)
        G = torch_geometric.utils.from_networkx(grid.graph,
                                                group_node_attrs=['weight'],
                                                group_edge_attrs=['weight'])
        
        G.y = torch.tensor(np.asarray([y_test[i]]), dtype=torch.int64)

        if self.test:
          torch.save(G, os.path.join(self.processed_dir, f'data_test_{i}.pt'))
        else:
          torch.save(G, os.path.join(self.processed_dir, f'data_{i}.pt'))
    else:
      for i, j in tqdm(enumerate(X_train), total=len(X_train)):
        grid = ImageGrid(X_train[i], diff_edge="diff", seed=42)
        G = torch_geometric.utils.from_networkx(grid.graph,
                                                group_node_attrs=['weight'],
                                                group_edge_attrs=['weight'])
        
        G.y = torch.tensor(np.asarray([y_train[i]]), dtype=torch.int64)

        if self.test:
          torch.save(G, os.path.join(self.processed_dir, f'data_test_{i}.pt'))
        else:
          torch.save(G, os.path.join(self.processed_dir, f'data_{i}.pt'))
  
  def len(self):
    if self.test:
      return len(X_test)
    else:
      return len(X_train)
  
  def get(self, idx):
    if self.test:
      data = torch.load(os.path.join(self.processed_dir, f'data_test_{idx}.pt'))
    else:
      data = torch.load(os.path.join(self.processed_dir, f'data_{idx}.pt'))
    return data

In [8]:
train_dataset = ImageDataset(root='data/')

In [9]:
test_dataset = ImageDataset(root='data/', test=True)

In [10]:
train_dataset[0]

Data(edge_index=[2, 9466], x=[786, 1], edge_attr=[9466, 1], y=[1])

In [11]:
test_dataset[0]

Data(edge_index=[2, 9466], x=[786, 1], edge_attr=[9466, 1], y=[1])

In [12]:
train_dataset[0].y[0]

tensor(7)

In [13]:
# !zip -r /content/GraphMNIST.zip /content/data/

In [14]:
# from google.colab import files
# files.download("/content/GraphMNIST.zip")

In [15]:
from torch_geometric.data import DataLoader
NUM_GRAPHS_PER_BATCH = 32
train_loader = DataLoader(train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)



In [16]:
for step, data in enumerate(train_loader):
    if (step + 1) == 11:
      break
    print(f'Step {step + 1}:')
    print('=================')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print(data.x.shape)
    print(data.y.shape)

Step 1:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 302912], x=[25152, 1], edge_attr=[302912, 1], y=[32], batch=[25152], ptr=[33])
torch.Size([25152, 1])
torch.Size([32])
Step 2:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 302912], x=[25152, 1], edge_attr=[302912, 1], y=[32], batch=[25152], ptr=[33])
torch.Size([25152, 1])
torch.Size([32])
Step 3:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 302912], x=[25152, 1], edge_attr=[302912, 1], y=[32], batch=[25152], ptr=[33])
torch.Size([25152, 1])
torch.Size([32])
Step 4:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 302912], x=[25152, 1], edge_attr=[302912, 1], y=[32], batch=[25152], ptr=[33])
torch.Size([25152, 1])
torch.Size([32])
Step 5:
Number of graphs in the current batch: 32
DataBatch(edge_index=[2, 302912], x=[25152, 1], edge_attr=[302912, 1], y=[32], batch=[25152], ptr=[33])
torch.Size([25152, 1])
torch.Size([32])
Step 6:
Number of graphs in th

In [17]:
from torch.nn import Linear, Flatten
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super(GCN, self).__init__()
    torch.manual_seed(12345)
    self.conv1 = GCNConv(train_dataset.num_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, hidden_channels)
    self.conv3 = GCNConv(hidden_channels, hidden_channels)
    self.flatten = Flatten()
    self.lin = Linear(hidden_channels, 10)
  
  def forward(self, x, edge_index, batch):
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = self.conv2(x, edge_index)
    x = x.relu()
    x = self.conv3(x, edge_index)
    x = global_mean_pool(x, batch)
    x = self.flatten(x)
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.lin(x)

    return x

model = GCN(hidden_channels=32)
print(model)

GCN(
  (conv1): GCNConv(1, 32)
  (conv2): GCNConv(32, 32)
  (conv3): GCNConv(32, 32)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (lin): Linear(in_features=32, out_features=10, bias=True)
)


In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [19]:
model = GCN(hidden_channels=32)
model.to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    model.double()
    for data in train_loader:
        out = model(data.x.pin_memory().to(device), data.edge_index.pin_memory().to(device), data.batch.pin_memory().to(device))
        loss = criterion(out, data.y.pin_memory().to(device))
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x.pin_memory().to(device), data.edge_index.pin_memory().to(device), data.batch.pin_memory().to(device))
        pred = out.argmax(dim=1)
        correct += int((pred == data.y.pin_memory().to(device)).sum())
    return correct / len(loader.dataset)

for epoch in range(0, 200):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

GCN(
  (conv1): GCNConv(1, 32)
  (conv2): GCNConv(32, 32)
  (conv3): GCNConv(32, 32)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (lin): Linear(in_features=32, out_features=10, bias=True)
)


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.