In [2]:
from gurobipy import *
import networkx as nx
import matplotlib.pyplot as plt
import random
import warnings
import os
warnings.filterwarnings("ignore")

## Graph Representation

Defintions:
We have a graph $G = (V, E, w)$, where $V = \{1, 2, ..., n\}$, $E$ is the set of edges, and $w$ is the weight of each vertex, the function mapping $V \rightarrow \mathbb{R}^{+}$.

## Drawing Utility

Takes in a undirected graph, and a color map, and draw the graph out in a circular format. 

In [4]:
def draw_reduced(g, color_map = None):
    
    node_lables = nx.get_node_attributes(g, "weight")
    
    if node_lables:
        node_lables = {k: "{0}: {1}".format(k, v) for (k, v) in node_lables.items()}
    else:
        node_lables = {k: k for k in g.nodes}
    
    plt.figure()
    pos = nx.circular_layout(g)
    nx.draw(g, pos, node_size=2000, width = 1, node_color = color_map)
    nx.draw_networkx_labels(g, pos, node_lables)
    plt.show()  
    # plt.savefig("graph.png", dpi=1000)


## Gurobi maximum independent set solver

Takes in a undirected graph, with optional assignment of weight to each vertex. Returns the maximum independent set, computed by the Gurobi solver. 

If using `display=True`, `draw_reduced()` must be supplied.

In [5]:
def compute_max_ind_set(graph, display = False, debug = False):
    nodes = list(graph.nodes)
    model = Model('Maximum Independent Set')
    
    # Suppress output
    if not debug:
        model.setParam('OutputFlag', 0)

    # An indicator variable for whether a node is chosen
    indicators = model.addVars(nodes, vtype=GRB.BINARY, name = "x")
    
    # Set Constraint: If two nodes are bound by an edge, they cannot be both chosen
    model.addConstrs(((indicators[i] + indicators[j] <= 1) for (i, j) in graph.edges), "Max")
    
    # Set objective: Maximize the weighted sum of nodes, or if no weights, just the cardinality.
    weights = nx.get_node_attributes(graph, "weight")
    if weights:
        obj = sum([indicators[i] * weights[i] for i in nodes])
    else:
        obj = sum([indicators[i] for i in nodes])
        
    model.setObjective(obj, GRB.MAXIMIZE)
    model.optimize()
    
    # Find all selected nodes and save them in soln
    soln = []
    for i in range(len(indicators)):
        if indicators[i].x != 0:
            soln.append(i)
    
    if display:
        # Create color map from selected nodes, green for selected, grey for unselected.
        color_map = ["grey"] * len(graph.nodes)
        for i in soln:
            color_map[i] = "green"
        draw_reduced(graph, color_map)
    return soln

### One example: Random Binomial Graph

In [6]:
# Generate a random sparse graph and assign random weights to each vertex


rand_graph = nx.fast_gnp_random_graph(100, 0.20, directed=False)

# for v in rand_graph.nodes():
#     rand_graph.nodes[v]['weight'] = round(random.uniform(0, 1), 2)
    
#draw_reduced(rand_graph)

In [7]:
rand_output = compute_max_ind_set(rand_graph, display=False, debug=True)

Optimize a model with 996 rows, 100 columns and 1992 nonzeros
Variable types: 0 continuous, 100 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 14.0000000
Presolve removed 580 rows and 0 columns
Presolve time: 0.00s
Presolved: 416 rows, 100 columns, 1374 nonzeros
Variable types: 0 continuous, 100 integer (100 binary)

Root relaxation: objective 2.645081e+01, 323 iterations, 0.01 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0   26.45081    0   98   14.00000   26.45081  88.9%     -    0s
H    0     0                      17.0000000   26.45081  55.6%     -    0s
H    0     0                      18.0000000   26.45081  46.9%     -    0s
     0     0   26.26182    0   99   18.00000   26.26182  45.9%  

In [8]:
len(rand_output)

20

## Generic Dataset Generator

Generates random graphs and compute their maximum independent set.  
Stores data in `data/generic_binomial`.  
Generates random graphs and stores them in individual files named as `binomial[ID].txt`, where `[ID]` is the ID of the graph. 
The ID ranges from 1 to n, where n is the size of the dataset.

A CSV would also be included in the directory. The format of this file would be rows of:  
```
filename, node1, node2, ... node[n]
```
The graph would be stored at `filename` and the solution would be presented as a list of nodes. Note that this only one solution specified in this training set, there might be other maximum solutions.


In [9]:
import os
print(os.getcwd())

/home/zach/Documents/summer/DeepKidney


In [3]:
import tarfile

def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        tar.add(source_dir, arcname=os.path.basename(source_dir))

In [11]:
from tqdm import tqdm_notebook
import os
import csv
import pathlib
import numpy as np
import networkx as nx
dataset_name = "binomial_80"
root_dir = "data/" + dataset_name
label_filename = "label.csv"
weight_filename = 'weight.csv'

num_examples = 8000
num_nodes = 80
edge_prob = 0.2

os.makedirs(os.path.join(root_dir), exist_ok=True)
with open(os.path.join(root_dir, label_filename), 'w+') as label_file:
    with open(os.path.join(root_dir, weight_filename), 'w+') as weight_file:
        label_writer = csv.writer(label_file, delimiter=',')
        weight_writer = csv.writer(weight_file, delimiter=',')
        label_writer.writerow(["Filename"] + list(range(num_nodes)))
        weight_writer.writerow(["Filename"] + list(range(num_nodes)))
        for i in tqdm_notebook(range(num_examples)):
            graph_filename = "binomial" + str(i) + ".txt"
            graph = nx.fast_gnp_random_graph(num_nodes, edge_prob, directed=False)
            
            # weights = np.round(np.random.rand(num_nodes), 3)
            weights = np.ones(num_nodes)
            for v in graph.nodes():
                graph.nodes[v]['weight'] = weights[v]
            
            with open(os.path.join(root_dir, graph_filename), 'wb+') as graph_file:
                nx.write_adjlist(graph, graph_file)
        
            output_list = compute_max_ind_set(graph)
            
            output = np.zeros(num_nodes, dtype=int)
            output[output_list] = 1
            label_writer.writerow([graph_filename] + list(output))
            weight_writer.writerow([graph_filename] + list(weights))
            label_file.flush()
            
make_tarfile(root_dir + '.tar.gz', root_dir)

HBox(children=(IntProgress(value=0, max=8000), HTML(value='')))




In [5]:
make_tarfile("data/weighted_binomial" + '.tar.gz', "data/weighted_binomial_80")

In [12]:
# Compress dataset for Git
# import os
# dataset_name = "generic_binomial_100"
# root_dir = "data/" + dataset_name
