In [1]:
import networkx as nx
import numpy as np
from neo4j import GraphDatabase

In [2]:
driver = GraphDatabase.driver('bolt://localhost:7687', auth=("neo4j", "test"))

In [3]:
query_all="MATCH (n)-[r]->(c) RETURN *"
query_intermediaries="MATCH (n: Entity)-[r]->(c) RETURN n,r"
query_singleton="MATCH (n) WHERE NOT (n)--() RETURN n;"
query_intermediaries="MATCH p=()-[r:INTERMEDIARY_OF]->() RETURN p"
query_officers="MATCH p=()-[r:OFFICER_OF|DIRECTOR_OF|BENEFICIARY_OF|SHAREHOLDER_OF]->() RETURN p"
query="MATCH (n)-[r]->(c) RETURN * LIMIT 25"

In [4]:
results = driver.session(database="panama").run(query)
results

<neo4j.work.result.Result at 0x1cebad9b4c8>

In [5]:
G = nx.Graph()

nodes = list(results.graph()._nodes.values())
for node in nodes:
    G.add_node(node.id, labels=node._labels, properties=node._properties)

rels = list(results.graph()._relationships.values())
for rel in rels:
    G.add_edge(rel.start_node.id, rel.end_node.id, key=rel.id, type=rel.type, properties=rel._properties)

In [6]:
nodes

[<Node id=320669 labels=frozenset({'Address'}) properties={'sourceID': 'Panama Papers', 'note': '', 'address': 'LOMBARD ODIER DARIER HENTSCH & CIE ATTN MR. LAURENT PICTET RUE DE LA CORRATERIE 11 1204 GENEVE SWITZERLAND', 'valid_until': 'The Panama Papers  data is current through 2015', 'name': '', 'country_codes': 'CHE', 'countries': 'Switzerland', 'node_id': '14100320'}>,
 <Node id=82 labels=frozenset({'Entity'}) properties={'sourceID': 'Panama Papers', 'company_type': '', 'note': '', 'closed_date': '', 'jurisdiction': 'SAM', 'struck_off_date': '', 'service_provider': 'Mossack Fonseca', 'countries': 'Switzerland', 'jurisdiction_description': 'Samoa', 'valid_until': 'The Panama Papers data is current through 2015', 'ibcRUC': 'R28373', 'name': 'JAIPUR HOLDING LIMITED', 'country_codes': 'CHE', 'inactivation_date': '', 'incorporation_date': '12-OCT-2006', 'node_id': '10001087', 'status': 'Active'}>,
 <Node id=320512 labels=frozenset({'Address'}) properties={'sourceID': 'Panama Papers', 'n

In [29]:
nx.is_connected(G)

False

In [14]:
print("Number of nodes: ", G.number_of_nodes())
print("Number of edges: ", G.number_of_edges())

Number of nodes:  43
Number of edges:  25


In [15]:
# Graph connected components
S = [G.subgraph(c).copy() for c in nx.connected_components(G)]

In [30]:
def get_stats(stats):
    array = list(stats.values())
    response = {
        "min": min(array),
        "max": max(array),
        "mean": np.mean(array),
        "std": np.std(array)
    }
    return response

In [31]:
## Degree centrality
stats_degree_centrality = get_stats(nx.degree_centrality(G))
print(stats_degree_centrality)

{'min': 0.023809523809523808, 'max': 0.09523809523809523, 'mean': 0.027685492801771867, 'std': 0.01351778030535294}


In [32]:
## Information centrality
stats_information_centrality_dict = {}
for i in S:
    stats_information_centrality_dict.update(nx.information_centrality(i))
stats_information_centrality = get_stats(stats_information_centrality_dict)
print(stats_information_centrality)

{'min': 0.14285714285714285, 'max': 1.0, 'mean': 0.7462347729789589, 'std': 0.353534832443594}


In [33]:
## Closeness centrality
stats_closeness_centrality = get_stats(nx.closeness_centrality(G))
print(stats_closeness_centrality)

{'min': 0.023809523809523808, 'max': 0.09523809523809523, 'mean': 0.03260032695248641, 'std': 0.015453805178090039}


In [34]:
## Betwenness centrality
stats_betweenness_centrality = get_stats(nx.betweenness_centrality(G, k=25))
print(stats_betweenness_centrality)

{'min': 0.0, 'max': 0.005993031358885017, 'mean': 0.0003252032520325203, 'std': 0.0012874709835587474}


In [35]:
stats_current_flow_closeness_centrality_dict = {}
for i in S:
    stats_current_flow_closeness_centrality_dict.update(nx.current_flow_closeness_centrality(i))
stats_current_flow_closeness_centrality = get_stats(stats_current_flow_closeness_centrality_dict)
print(stats_current_flow_closeness_centrality)

{'min': 0.14285714285714285, 'max': 1.0, 'mean': 0.7462347729789589, 'std': 0.353534832443594}
