# Graph Generation With Python

## Imports

In [1]:
# nk: speed++ (openMP backend)
import networkit as nk
# nx: complete++ (pure Python)
import networkx as nx

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [7]:
import pathlib
base_dir = pathlib.Path('../inputs')

## Manual Graph
Tiny little graph for testing my readers

In [4]:
# ONLY for testing, we don't handle unconnected components
#   // Graph diagram:
#   // 0 --- 1     3 --- 6
#   // | \   |
#   // |  \  |     4
#   // |   \ |
#   // 2     5
g = nk.graph.Graph(7)
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(0, 5)
g.addEdge(1, 5)
g.addEdge(3, 6)
nk.writeGraph(g, '../inputs/test_graph/tiny_py_GML', nk.Format.GML)
nk.writeGraph(g, '../inputs/test_graph/tiny_py_BIN', nk.Format.NetworkitBinary)



In [20]:
g_cpp = nk.readGraph('../inputs/test_graph/tiny_cpp_GML', nk.Format.GML)
print('n: {}'.format(g_cpp.numberOfEdges() == g.numberOfEdges()))
print('n: {}'.format(g_cpp.numberOfNodes() == g.numberOfNodes()))
g_cpp = nk.readGraph('../inputs/test_graph/tiny_cpp_BIN', nk.Format.NetworkitBinary)
print('n: {}'.format(g_cpp.numberOfEdges() == g.numberOfEdges()))
print('n: {}'.format(g_cpp.numberOfNodes() == g.numberOfNodes()))

n: True
n: True
n: True
n: True


In [14]:
# ONLY for testing, we don't handle unconnected components
#   // Graph diagram:
#   // 0 --- 1
#   //   \   |
#   //    \  |
#   //     \ |
#   //       2
g = nk.graph.Graph(3)
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(1, 2)
nk.writeGraph(g, '../inputs/test_graph/min_py_GML', nk.Format.GML)
nk.writeGraph(g, '../inputs/test_graph/min_py_BIN', nk.Format.GraphToolBinary)



## Generate
You should try to generate any kind of graph you like into a variable `g`, and then we'll take its biggest connected component as the final graph.

In [4]:
# =======================================================================
# We can essentially generate any graph
# ref: https://networkx.org/documentation/stable/reference/generators.html
# ref: https://networkit.github.io/dev-docs/python_api/generators.html
# Please call your own beloved graph generator here, also keep updating the algo_name!
# =======================================================================
algo_name = 'ErdosRenyi'
g = nk.generators.ErdosRenyiGenerator(10_000_000, 20/10_000_000).generate()

In [5]:
if type(g) == nx.Graph:
    g = nk.nxadapter.nx2nk(g)
print('Picking the largest connected component!')
cc = nk.components.ConnectedComponents(g)
g_connect = cc.extractLargestConnectedComponent(g, True)
n = g_connect.numberOfNodes()
m = g_connect.numberOfEdges()
print('Connected: {} nodes and {} egdes'.format(n, m))

Picking the largest connected component!
Connected: 10000000 nodes and 100004838 egdes


In [5]:
def gen_graph(g, algo_name):
    if type(g) == nx.Graph:
        g = nk.nxadapter.nx2nk(g)
    cc = nk.components.ConnectedComponents(g)
    g_connect = cc.extractLargestConnectedComponent(g, True)
    n = g_connect.numberOfNodes()
    m = g_connect.numberOfEdges()
    return (g_connect, algo_name, n, m)

## Save
We save the file into two different formats. 
1. GraphML format. This is for easy visualization. Notable examples of dedicated and fully-featured graph visualization tools are Cytoscape, Gephi, Graphviz and, for LaTeX typesetting, PGF/TikZ.
2. NetworkitBinaryGraph format. This file is binary. It is not only much faster than existing formats, it is also compressed.

In [6]:
# False if you don't want to
graphml = False
binary = True

In [8]:
algo_base = base_dir.joinpath(algo_name)
algo_base.mkdir(parents=True, exist_ok=True)
if graphml:
    path = str(algo_base.joinpath('{}_{}_{}_GML'.format(algo_name, n, m)))
    nk.writeGraph(g, path, nk.Format.GML)
if binary:
    path = str(algo_base.joinpath('{}_{}_{}_BIN'.format(algo_name, n, m)))
    nk.writeGraph(g, path, nk.Format.GraphToolBinary)

: 

In [4]:
def save_graph(g_info, gml, bin):
    g, algo_name, n, m = g_info
    algo_base = base_dir.joinpath(algo_name)
    algo_base.mkdir(parents=True, exist_ok=True)
    if gml:
        path = str(algo_base.joinpath('{}_{}_{}_GML'.format(algo_name, n, m)))
        nk.writeGraph(g, path, nk.Format.GML)
    if bin:
        path = str(algo_base.joinpath('{}_{}_{}_BIN'.format(algo_name, n, m)))
        nk.writeGraph(g, path, nk.Format.GraphToolBinary)

## Batch

In [16]:
import itertools
algo = 'ErdosRenyi_Bench'
for n in range(5_000, 500_000, 100_000):
    base_p = 1/n
    for p in [base_p, base_p*10, base_p*50]:
        g = nk.generators.ErdosRenyiGenerator(n, p).generate()
        graph_info = gen_graph(g, algo)
        save_graph(graph_info, False, True)

: 

## Analysis
You can also do analysis on those graphs as well.