In [1]:
import json

data_path = 'data1'

graphs = json.load(open('%s/graphs.json' % data_path, 'r'))

In [6]:
import networkx as nx

def get_cycles(edges):
    g = nx.DiGraph(edges)
    return [c for c in nx.simple_cycles(g)]

def get_paths(edges):
    g = nx.DiGraph(edges)
    rs = (v for v, d in g.in_degree() if d == 0)
    ls = [v for v, d in g.out_degree() if d == 0]
    paths = []
    for r in rs:
        p = nx.all_simple_paths(g, r, ls)
        paths.extend(p)
    return paths

def get_edge_set(paths):
    e = set()
    for p in paths:
        for i in range(len(p)):
            for j in range(i + 1, len(p)):
                e.add((p[i], p[j]))
    return e

def get_closure(e, edges):
    closure = set()
    for n in edges:
        if n[0] == e[1]:
            closure.add(n)
            closure.update(get_closure(n, edges))

    return closure

def verify_graph(nodes, edges):
    n_list = list(set([e[0] for e in edges]) | set([e[1] for e in edges]))
    return [n for n in n_list if n not in nodes]

comps_all = set()
comps_non = set()

for i in graphs.keys():
    comps_a = set()
    comps_n = set()
    for j in range(len(graphs[i])):
        nodes, edges, implied = graphs[i][j]
        edges = [(e[0], e[1]) for e in edges]
        
        # validate graph
        missing = verify_graph(nodes, edges)
        if len(missing) > 0:
            print('skipping in %s, missing labels: %s' % (i, missing))
            continue
        cycles = get_cycles(edges)
        if len(cycles) > 0:
            print('skipping in %s, cycles: %s' % (i, cycles))
            continue

        # compute transitive closure
        closure = set()
        if False:
            paths = get_paths(edges)
            closure = get_edge_set(paths)
            for e in closure:
                comps_a.add((nodes[e[0]], nodes[e[1]]))
    
            if len(closure) == 0:
                print('skipping in %s, closure == 0' % i)
                continue
        else:
            comps_a.update([(nodes[e[0]], nodes[e[1]]) for e in edges])

        # compute all edges for complete graph
        c = set()
        nodes_list = list(nodes.keys())
        edges_all = set()
        for m in range(len(nodes_list)):
            for n in range(m + 1, len(nodes_list)):
                edges_all.add((nodes_list[m], nodes_list[n]))
                edges_all.add((nodes_list[n], nodes_list[m]))

        inverse = edges_all - comps_a
        for e in inverse:
            comps_n.add((nodes[e[0]], nodes[e[1]]))

        #print('Found %i declared and %i undeclared edges' % (len(comps_a), len(comps_n)))
        comps_all.update(comps_a)
        comps_non.update(comps_n)

print('Found %i declared and %i undeclared edges' % (len(comps_all), len(comps_non)))

skipping in 5, cycles: [['g7', 'g5']]
skipping in 33, cycles: [['g10', 'g27'], ['g10', 'g3', 'g27']]
skipping in 33, cycles: [['g15', 'g14']]
skipping in 13, missing labels: ['g4']
skipping in 13, cycles: [['g19', 'g20'], ['g19', 'g21']]
skipping in 13, cycles: [['g18', 'g11']]
skipping in 13, cycles: [['g7', 'g21']]
skipping in 14, cycles: [['g18', 'ig3'], ['g10', 'ig2']]
skipping in 14, cycles: [['g11']]
skipping in 30, cycles: [['g2', 'g11']]
skipping in 21, cycles: [['g13', 'g8'], ['g9', 'g5']]
skipping in 27, cycles: [['g3', 'g16']]
Found 1475 declared and 15310 undeclared edges


In [7]:
syms_all = set()
for c in comps_all:
    d = (c[1], c[0])
    if d in comps_all:
        syms_all.add(c)
print('Found %i symmetric refinements' % len(syms_all))

Found 50 symmetric refinements


In [8]:
import random, csv

sample_size = 250
sample_all = random.sample(list(comps_all), sample_size)
sample_non = random.sample(list(comps_non), sample_size)

sym_sample_size = int(2 * (min(50, len(syms_all)) / 2))
sym_split = int(sym_sample_size / 2)
sample_sym = random.sample(list(syms_all), sym_sample_size)

rows = []
for p in sample_all:
    rows.append(['POSITIVE', p[0], p[1]])
for p in sample_sym[:sym_split]:
    rows.append(['SYMMETRIC;', p[0], p[1]])
for p in sample_non:
    rows.append(['NEGATIVE', p[0], p[1]])  
for p in sample_sym[sym_split:]:
    rows.append(['SYMMETRIC', p[1], p[0]])

random.shuffle(rows)
with open('comps_unlabeled.csv', 'w+') as f:
    writer = csv.writer(f)
    writer.writerows(rows)

rows = []
for p in sample_all:
    rows.append([p[0], p[1]])
for p in sample_sym[:sym_split]:
    rows.append([p[0], p[1]])
for p in sample_non:
    rows.append([p[0], p[1]])  
for p in sample_sym[sym_split:]:
    rows.append([p[1], p[0]])
random.shuffle(rows)
with open('comps.csv', 'w+') as f:
    writer = csv.writer(f)
    writer.writerows(rows)