In [18]:
import os
import torch
import pandas as pd
import numpy as np
import csv
import networkx as nx
from torch.utils.data import Dataset, DataLoader
from collections import OrderedDict 

## Drawing Utility

Takes in a undirected graph, and a color map, and draw the graph out in a circular format. 

In [22]:
def draw_entry(entry):
    g = entry["graph"]
    label = entry["label"]
    
    # Create color map from selected nodes, green for selected, grey for unselected.
    color_map = ["grey"] * len(g.nodes)
    for i in np.nonzero(label):
        color_map[i] = "green"

    node_lables = nx.get_node_attributes(g, "weight")
    
    if node_lables:
        node_lables = {k: "{0}: {1}".format(k, v) for (k, v) in node_lables.items()}
    else:
        node_lables = {k: k for k in g.nodes}
    
    plt.figure()
    pos = nx.circular_layout(g)
    nx.draw(g, pos, node_size=2000, width = 1, node_color = color_map)
    nx.draw_networkx_labels(g, pos, node_lables)
    plt.show()  
    # plt.savefig("graph.png", dpi=1000)


In [15]:
dataset_name = "generic_binomial"
root_dir = "data/" + dataset_name
label_filename = "label.csv"

label_dict = OrderedDict()
with open(os.path.join(root_dir, label_filename), 'r') as label_file:
    label_reader = csv.reader(label_file, delimiter = ",")
    for row in label_reader:
        label_dict[row[0]] = sorted(row[1:])
label_frame = pd.DataFrame.from_dict(label_dict, "index")
label_frame.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
binomial0.txt,10,16,20,22,25,31,36,44,49.0,,
binomial1.txt,14,15,19,2,22,26,28,38,40.0,,
binomial2.txt,15,2,24,25,26,3,38,40,,,
binomial3.txt,10,15,22,29,33,35,36,45,7.0,,
binomial4.txt,0,1,10,12,17,19,21,35,48.0,9.0,


In [24]:
class MaxIndSetDataset(Dataset):
    """Graphs labelled with its Maximum Independent Set"""
    
    def __init__(self, csv_file = None, root_dir = None, tar_dir = None):
        if csv_file and root_dir:
            # Read CSV File
            label_dict = OrderedDict()
            with open(os.path.join(root_dir, label_filename), 'r') as label_file:
                label_reader = csv.reader(label_file, delimiter = ",")
                for row in label_reader:
                    label_dict[row[0]] = sorted(row[1:])
            self.label_frame = pd.DataFrame.from_dict(label_dict, "index")
            self.root_dir = root_dir
        else:
            raise NotImplementedError("Need to supply csv_file and root_dir")
    def __len__(self):
        return len(self.label_frame)
    
    def __getitem__(self, idx):
        graph_name = os.path.join(self.root_dir, self.label_frame.iloc[idx, 0])
        graph = nx.read_multiline_adjlist(graph_name)
        label_array = np.array(self.label_frame.iloc[idx, 1:])
        
        # A binary vector of the nodes used
        label_bin = np.zeros(len(graph.nodes))
        label_bin[label_array] = 1
        
        sample = {"graph": graph, "label": label_bin}
        return sample

In [25]:
generic_bin_dataset = MaxIndSetDataset(csv_file='data/generic_binomial/label.csv', 
                                       root_dir='data/generic_binomial/')
draw_entry(generic_bin_dataset[0])

FileNotFoundError: [Errno 2] No such file or directory: 'data/generic_binomial/10'