Example of a function for reading .CLU files (PAJEK partitions) within networkx.

Using the style of networkx's 'read_pajek' at https://github.com/networkx/networkx/blob/master/networkx/readwrite/pajek.py

@Author: github.com/joaquincabezas

In [None]:
import networkx as nx
from networkx.utils import open_file
from collections import defaultdict

In [None]:
@open_file(0, mode='rb')
def read_pajek_clu(path, encoding='UTF-8'):
    """Read partition in Pajek format from path.
    Parameters
    ----------
    path : file or string
       File or filename to read.
       Filenames ending in .gz or .bz2 will be uncompressed.
    Returns
    -------
    communities (list) – List of communities
    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> nx.write_pajek(G, "test.net")
    >>> G = nx.read_pajek("test.net")
    To create a Graph instead of a MultiGraph use
    >>> G1 = nx.Graph(G)
    References
    ----------
    See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
    for format information.
    """
    lines = (line.decode(encoding) for line in path)
    return parse_pajek_clu(lines)

In [None]:
def parse_pajek_clu(lines):
    """Parse Pajek format partition from string or iterable.
    Parameters
    ----------
    lines : string or iterable
       Data in Pajek partition format.
    Returns
    -------
    communities (list) – List of communities
    See Also
    --------
    read_pajek_clu()
    """
    if isinstance(lines, str):
        lines = iter(lines.split('\n'))
    lines = iter([line.rstrip('\n') for line in lines])
    
    while lines:
        try:
            l = next(lines)
        except:  # EOF
            break
        if l.lower().startswith("*vertices"):
            l, nnodes = l.split()
            communities = defaultdict(list)
            for vertice in range(int(nnodes)):
                l = next(lines)
                community = int(l)
                communities.setdefault(community, []).append(vertice)
        else:
            break
            
    return [ v for k,v in dict(communities).items() ]

In [None]:
# Example 
communities = read_pajek_clu('simple.clu', encoding='UTF-8')

In [None]:
communities