In [1]:
import networkx as nx
import numpy as np
import os
import json

In [2]:
# algorithm inspired by https://github.com/jeroenvldj/bow-tie_detection/blob/master/bow-tie_detection.py

def bowtie_analysis(G):
    # reverse all direction of the graph
    GT = nx.reverse(G, copy=True)
    # calculate SSC
    SSC = max(list(nx.strongly_connected_components(G)),key=len)    
    
    
    # take any node n from SSC and do a depth first search 
    # through directed graph beginning from node n
    v_any = list(SSC)[0]
    DFS_G = set(nx.dfs_tree(G,v_any).nodes())
    DFS_GT = set(nx.dfs_tree(GT,v_any).nodes())
    OUT = DFS_G - SSC
    IN = DFS_GT - SSC
    V_rest = set(G.nodes()) - SSC - OUT - IN

    TUBES = set()
    INTENDRILS = set()
    OUTTENDRILS = set()
    OTHER = set()

    for v in V_rest:
        # irv => in reaches node v
        irv = len(IN & set(nx.dfs_tree(GT,v).nodes())) is not 0
        # vro => node v reaches out
        vro = len(OUT & set(nx.dfs_tree(G,v).nodes())) is not 0
        if irv and vro:
            TUBES.add(v)
        elif irv and not vro:
            INTENDRILS.add(v)
        elif not irv and vro:
            OUTTENDRILS.add(v)
        elif not irv and not vro:
            OTHER.add(v)

    FRINGE = set()
    DISCONNECTED = set()
    for o in OTHER:
        # orIT => node o reaches INTENDRILS  
        orIT = len(INTENDRILS & set(nx.dfs_tree(G,o))) is not 0
        # OTro => OUTTERNDIRLS reaches node o
        OTro = len(OUTTENDRILS & set(nx.dfs_tree(GT,o))) is not 0
        if orIT or OTro:
            FRINGE.add(o)
        else:
            DISCONNECTED.add(o)
    
    TENDRILS = INTENDRILS.union(OUTTENDRILS)
    
    result_dict = {}
    result_dict["ssc"] = round(len(SSC)/len(G),4)
    result_dict["in"] = round(len(IN)/len(G),4)
    result_dict["out"] = round(len(OUT)/len(G),4)
    result_dict["tubes"] = round(len(TUBES)/len(G),4)
    result_dict["tendrils"] = round(len(TENDRILS)/len(G),4)
    result_dict["fringe"] = round(len(FRINGE)/len(G),4)
    result_dict["disconnected"] = round(len(DISCONNECTED)/len(G),4)
            
    return result_dict

In [3]:
def files_walker(directory):
    d = {}
    files = os.listdir(directory)
    for i in range(len(files)):
        with open(directory+'/'+files[i], 'r', encoding='utf8', errors='ignore') as f:
            G = nx.read_graphml(f)
            bowtie_dict = bowtie_analysis(G)
            d[files[i][:10]] = bowtie_dict
    return d

In [4]:
result_dict = files_walker("data/NET-btc-heur_0-week")
with open("result.json", "w") as outfile:  
    json.dump(result_dict, outfile) 