In [5]:
import os
import os.path as osp

import networkx as nx
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import InMemoryDataset, DataLoader
from torch_geometric.utils import from_networkx


class MaxIndDataset(InMemoryDataset):
    def __init__(self, root):
        self.label_frame = pd.read_csv(osp.join(root, "label.csv"))
        self.weight_frame = pd.read_csv(osp.join(root, "weight.csv"))
        self.root_dir = root
        self.num_graphs = len(self.label_frame)
        super(MaxIndDataset, self).__init__(root)
        self.data, self.slices = torch.load(self.processed_paths[0])
   
    @property   
    def processed_file_names(self):
        return ['data.pt']
    
    def _download(self):
        pass
    
    def process(self):
        data_list = []
        for i in range(self.num_graphs):
            graph_name = os.path.join(self.root_dir, self.label_frame.iloc[i, 0])
            graph = nx.read_adjlist(graph_name, nodetype=int)
   
            weight = torch.tensor(self.weight_frame.iloc[i, 1:], dtype=torch.float)
            label = torch.tensor(self.label_frame.iloc[i, 1:], dtype=torch.long)
        
            data = from_networkx(graph)
            data.x = weight
            data.y = label
            
            data_list.append(data)
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [6]:
from torch_geometric.utils import to_networkx
import matplotlib.pyplot as plt
import torch
def draw_entry(entry):
    g = to_networkx(entry)
    label = entry["y"]
    
    # Create color map from selected nodes, green for selected, grey for unselected.
    color_map = ["grey"] * len(g.nodes)

    for i in np.flatnonzero(label):
        color_map[i] = "green"
        
    node_labels = entry["x"]
    
    if not torch.equal(node_labels, torch.ones(len(g.nodes))):
        node_labels = {k: "{0}:\n{1:.3f}".format(k, v) for (k, v) in enumerate(node_labels)}
    else:
        node_labels = {k: k for k in g.nodes}
    
    plt.figure()
    pos = nx.circular_layout(g)
    nx.draw(g, pos, node_size=2000, width = 1, node_color = color_map)
    nx.draw_networkx_labels(g, pos, node_labels)
    plt.show()  
    

In [14]:
def split_loader(dataset, train_size, test_size, batch_size):
    dataset.shuffle()
    size = len(dataset)
    
    tr_i = int(size * train_size)
    val_i = tr_i + int(size * test_size)
    train_loader = DataLoader(test[:tr_i], batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(test[tr_i: val_i])
    test_loader = DataLoader(test[val_i:])
    return train_loader, val_loader, test_loader

In [15]:
bin_80 = MaxIndDataset('data/binomial_80')

train_loader, val_loader, test_loader = split_loader(bin_80, .7, .2, 20)



In [30]:

for b in train_loader:
    print(b)
    print(b.x.size())
    print(b.edge_index.size())
    break

Batch(batch=[1600], edge_index=[2, 25948], x=[1600], y=[1600])
torch.Size([1600])
torch.Size([2, 25948])


In [31]:
train_loader.dataset

MaxIndDataset(5600)

In [27]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16, 1)

    def forward(self, data):
        weight = data["x"].view(-1, 1)
        edge_index = data["edge_index"]
        x = self.conv1(weight, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.sigmoid(x)
        return x

In [37]:
from tqdm import tqdm, tqdm_notebook

### Runs but losses dont go down, seems to converge to around 0.48

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = Net().to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train(epoch):
    model.train()
    running_loss = 0.0
    for i, item in enumerate(train_loader):
        optimizer.zero_grad()
        item = item.to(device)
        outputs = model(item)
        
        loss = criterion(outputs, item["y"].float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader.dataset)

def evaluate(loader):
    model.eval()
    running_acc = 0.0 # Implement later
    running_loss = 0.0
    for i, item in enumerate(loader):
        item = item.to(device)
        outputs = model(item)
        loss = criterion(outputs.squeeze(), item["y"].float().squeeze())
        running_loss += loss.item()
    return running_loss / len(loader.dataset)
        
for epoch in tqdm_notebook(range(30)):
    train_loss = train(epoch)
    val_loss = evaluate(val_loader)
    print(('Epoch: {:03d}, Train Loss: {:.3f}, Val Loss: {:.3f}').format(epoch, train_loss, val_loss))

print('Finished Training')        

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Epoch: 000, Train Loss: 0.025, Val Loss: 0.478


KeyboardInterrupt: 