In [1]:
import networkx as nx
import numpy as np
import pandas as pd
from neo4j import GraphDatabase
import collections
import matplotlib.pyplot as plt
from collections import Counter
from tabulate import tabulate
import pylab as p
from scipy.optimize import curve_fit
import random

In [2]:
driver = GraphDatabase.driver('bolt://localhost:7687', auth=("neo4j", "test"))
query="MATCH (n)-[r]-(m)WHERE NOT m:Address And NOT n:Address RETURN *"
results = driver.session(database="panama").run(query)

In [3]:
G = nx.Graph()

nodes = list(results.graph()._nodes.values())
for node in nodes:
    G.add_node(node.id, kind=list(node.labels)[0], properties=node._properties)

rels = list(results.graph()._relationships.values())
for rel in rels:
    G.add_edge(rel.start_node.id, rel.end_node.id, key=rel.id, type=rel.type, properties=rel._properties)
len(G.nodes)

465786

In [4]:
# Graph connected components
S = [G.subgraph(c).copy() for c in nx.connected_components(G)]
print('The total number of components is: ',len(S))

The total number of components is:  12219


In [5]:
## Get nodes by kind. Output: map[kind]=[]
labels = {}
for n, d in G.nodes(data=True):
    l = d['kind']
    labels[l] = labels.get(l, [])
    labels[l].append(n)
print('Number of Entity nodes:', len(labels['Entity']), ' ====> Proportion(%):', round(len(labels['Entity'])/len(G.nodes)*100,2))
print('Number of Officer nodes:', len(labels['Officer']),' ====> Proportion(%):', round(len(labels['Officer'])/len(G.nodes)*100,2))
print('Number of Intermediary nodes:', len(labels['Intermediary']), ' ====> Proportion(%):', round(len(labels['Intermediary'])/len(G.nodes)*100,2))

Number of Entity nodes: 213634  ====> Proportion(%): 45.87
Number of Officer nodes: 238078  ====> Proportion(%): 51.11
Number of Intermediary nodes: 14074  ====> Proportion(%): 3.02


In [6]:
def get_ids_intermediaries(G,tupla,id_intermediary):
    ids_intermediaries = []
    entities_object = G.edges(tupla[1])
    for entity_neighbors in entities_object:
        # if it's officer
        if G.nodes[entity_neighbors[1]]['kind'] == 'Officer':
            id_officer = entity_neighbors[1]
            # Iteramos por sus vecinos
            for entidades_vecinas_officer in G.edges(id_officer):
                # Iteramos las entidades de ese officer
                for adjacencias_entidad in G.edges(entidades_vecinas_officer[1]):
                    # Si es un intermediario, no es el mismo que el que vamos a eliminar y no existe ya
                    if G.nodes[adjacencias_entidad[1]]['kind'] == 'Intermediary':
                        if id_intermediary != adjacencias_entidad[1] and adjacencias_entidad[1] not in ids_intermediaries:
                            ids_intermediaries.append(adjacencias_entidad[1])
                        break
    return ids_intermediaries

def recompute_edges(G,id_intermediary):
    vueltas = 0
    for i in G.edges(id_intermediary):
        new_id_intermediaries = get_ids_intermediaries(G,i,id_intermediary)
        if new_id_intermediaries:
            max_degree_id = max(dict(G.degree(new_id_intermediaries)), key=dict(G.degree(new_id_intermediaries)).get)
            G.add_edge(max_degree_id, i[1], key=G[id_intermediary][i[1]], type='INTERMEDIARY_OF')
        vueltas = vueltas+1
    print(vueltas)

In [None]:
## Percolation for GCC
total_graph = G.number_of_nodes()
scatter_gcc = [[0,1]]
scatter_slcc = [[0,0]]
scatter_cc = [[0,1]]
current_length_gcc = total_graph
Percolation_graph = G.copy()
i=1
while current_length_gcc/total_graph > 0.05:
    max_degree_id = max(dict(Percolation_graph.degree(labels['Intermediary'])), key=dict(Percolation_graph.degree(labels['Intermediary'])).get)
    recompute_edges(Percolation_graph,max_degree_id)
    Percolation_graph.remove_node(max_degree_id)
    G_sorted = sorted(nx.connected_components(Percolation_graph), key=len, reverse=True)
    scatter_cc.append([i,len(G_sorted)])
    Gcc = Percolation_graph.subgraph(G_sorted[0])
    Slcc = Percolation_graph.subgraph(G_sorted[1])
    current_length_gcc = len(Gcc)
    current_length_slcc = len(Slcc)
    print('The total number of components in Gcc is: ',current_length_gcc)
    print('The total number of components in Slcc is: ',current_length_slcc)
    print('Proportion gcc',current_length_gcc/total_graph)
    print('Proportion slcc',current_length_slcc/total_graph)
    scatter_gcc.append([i,current_length_gcc/total_graph])
    scatter_slcc.append([i,current_length_slcc/total_graph])
    i=i+1
    print(i)

7016
The total number of components in Gcc is:  299011
The total number of components in Slcc is:  3639
Proportion gcc 0.6419493071925734
Proportion slcc 0.007812600636343729
2
6311
The total number of components in Gcc is:  295504
The total number of components in Slcc is:  3639
Proportion gcc 0.6344200985001696
Proportion slcc 0.007812600636343729
3
6434
The total number of components in Gcc is:  284376
The total number of components in Slcc is:  3639
Proportion gcc 0.6105292988625678
Proportion slcc 0.007812600636343729
4
6267
The total number of components in Gcc is:  276893
The total number of components in Slcc is:  3639
Proportion gcc 0.594463981313307
Proportion slcc 0.007812600636343729
5
6419
The total number of components in Gcc is:  268332
The total number of components in Slcc is:  3639
Proportion gcc 0.5760842962218702
Proportion slcc 0.007812600636343729
6
4765
The total number of components in Gcc is:  263912
The total number of components in Slcc is:  3639
Proportion g

The total number of components in Gcc is:  205443
The total number of components in Slcc is:  3639
Proportion gcc 0.44106735711249373
Proportion slcc 0.007812600636343729
48
3473
The total number of components in Gcc is:  203795
The total number of components in Slcc is:  3639
Proportion gcc 0.43752925163057715
Proportion slcc 0.007812600636343729
49
3048
The total number of components in Gcc is:  202619
The total number of components in Slcc is:  3639
Proportion gcc 0.4350044870391124
Proportion slcc 0.007812600636343729
50
3414
The total number of components in Gcc is:  201965
The total number of components in Slcc is:  3639
Proportion gcc 0.43360040877141004
Proportion slcc 0.007812600636343729
51
3181
The total number of components in Gcc is:  199855
The total number of components in Slcc is:  3639
Proportion gcc 0.42907043148570373
Proportion slcc 0.007812600636343729
52
3256
The total number of components in Gcc is:  198603
The total number of components in Slcc is:  3639
Proport

3188
The total number of components in Gcc is:  168870
The total number of components in Slcc is:  3639
Proportion gcc 0.3625484664631397
Proportion slcc 0.007812600636343729
94
3568
The total number of components in Gcc is:  168438
The total number of components in Slcc is:  3639
Proportion gcc 0.3616210019193363
Proportion slcc 0.007812600636343729
95
3259
The total number of components in Gcc is:  167863
The total number of components in Slcc is:  3639
Proportion gcc 0.3603865294362647
Proportion slcc 0.007812600636343729
96
3306
The total number of components in Gcc is:  167708
The total number of components in Slcc is:  3639
Proportion gcc 0.3600537585930019
Proportion slcc 0.007812600636343729
97
3184
The total number of components in Gcc is:  167071
The total number of components in Slcc is:  3639
Proportion gcc 0.3586861777726252
Proportion slcc 0.007812600636343729
98
3264
The total number of components in Gcc is:  166975
The total number of components in Slcc is:  3639
Propor

In [None]:
'''
with open('gcc_max_defensaae.txt', 'w') as f:
    for item in scatter_gcc:
        f.write("%s\n" % item)
with open('slcc_max_daaaefense.txt', 'w') as f:
    for item in scatter_slcc:
        f.write("%s\n" % item)
with open('cc_max_defaaaaense.txt', 'w') as f:
    for item in scatter_cc:
        f.write("%s\n" % item)
'''

In [None]:
# Supervised percolation with autodefense plot
fig, ax1 = plt.subplots(figsize=(15,15),dpi=80)
ax1.set_title('Evolución componenete principal en percolación supervisada con autodefensa',fontsize=14,pad=20)
x_val_gcc = [x[0] for x in scatter_gcc]
y_val_gcc = [x[1] for x in scatter_gcc]
x_val_sgcc = [x[0] for x in scatter_slcc]
y_val_sgcc = [x[1] for x in scatter_slcc]

ax2 = ax1.twinx()
lns1 = ax1.plot(x_val_gcc, y_val_gcc, 'g-',label="GCC")
lns2 =ax2.plot(x_val_gcc, y_val_sgcc, 'b-',label="SLCC")
lns = lns1+lns2
ax1.set_xlabel('Intermediarios eliminados',fontsize=14)
ax1.set_ylim([0, 1.02])
ax2.set_ylim([0, 1.02])
ax1.set_ylabel('GCC',fontsize=14)
ax2.set_ylabel('SLCC',fontsize=14)
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs, loc=0,fontsize=14)
plt.show()

In [None]:
def get_ids_intermediaries_random(G,tupla,id_intermediary):
    ids_intermediaries = []
    entities_object = G.edges(tupla[1])
    for entity_neighbors in entities_object:
        # if it's officer
        if G.nodes[entity_neighbors[1]]['kind'] == 'Officer':
            id_officer = entity_neighbors[1]
            # Iteramos por sus vecinos
            for entidades_vecinas_officer in G.edges(id_officer):
                # Iteramos las entidades de ese officer
                for adjacencias_entidad in G.edges(entidades_vecinas_officer[1]):
                    # Si es un intermediario, no es el mismo que el que vamos a eliminar y no existe ya
                    if G.nodes[adjacencias_entidad[1]]['kind'] == 'Intermediary':
                        if id_intermediary != adjacencias_entidad[1] and adjacencias_entidad[1] not in ids_intermediaries:
                            ids_intermediaries.append(adjacencias_entidad[1])
                        break
    return ids_intermediaries

def recompute_edges_random(G,id_intermediary):
    vueltas = 0
    for i in G.edges(id_intermediary):
        new_id_intermediaries = get_ids_intermediaries(G,i,id_intermediary)
        if new_id_intermediaries:
            max_degree_id = random.choice(new_id_intermediaries)
            G.add_edge(max_degree_id, i[1], key=G[id_intermediary][i[1]], type='INTERMEDIARY_OF')
        vueltas = vueltas+1
    print(vueltas)

In [None]:
## Random percolation for GCC
total_graph = G.number_of_nodes()
scatters_dict = {}
scatter_random_gcc = [[0,1]]
scatter_random_slcc = [[0,0]]
scatter__random_cc = [[0,1]]
current_length_gcc = total_graph
Percolation_graph = G.copy()
j=1

while current_length_gcc/total_graph > 0.05:
    random.seed(32)
    random_id = random.choice(list(dict(Percolation_graph.degree(labels['Intermediary'])).keys()))
    recompute_edges(Percolation_graph,random_id)
    Percolation_graph.remove_node(random_id)
    G_sorted = sorted(nx.connected_components(Percolation_graph), key=len, reverse=True)
    scatter__random_cc.append([j,len(G_sorted)])
    Gcc = Percolation_graph.subgraph(G_sorted[0])
    Slcc = Percolation_graph.subgraph(G_sorted[1])
    current_length_gcc = len(Gcc)
    current_length_slcc = len(Slcc)    
    print('The total number of components in Gcc is: ',current_length_gcc)
    print('The total number of components in Slcc is: ',current_length_slcc)
    print('Proportion gcc',current_length_gcc/total_graph)
    print('Proportion slcc',current_length_slcc/total_graph)    
    scatter_random_gcc.append([j,current_length_gcc/total_graph])
    scatter_random_slcc.append([j,current_length_slcc/total_graph])
    j=j+1

In [None]:
# Random percolation with autodefense plot
fig, ax1 = plt.subplots(figsize=(15,15),dpi=80)
ax1.set_title('Evolución componenete principal en percolación aleatoria con autodefensa',fontsize=14,pad=20)
x_val_gcc = [x[0] for x in scatter_random_gcc]
y_val_gcc = [x[1] for x in scatter_random_gcc]
x_val_sgcc = [x[0] for x in scatter_random_slcc]
y_val_sgcc = [x[1] for x in scatter_random_slcc]

ax2 = ax1.twinx()
lns1 = ax1.plot(x_val_gcc, y_val_gcc, 'g-',label="GCC")
lns2 =ax2.plot(x_val_gcc, y_val_sgcc, 'b-',label="SLCC")
lns = lns1+lns2
ax1.set_xlabel('Intermediarios eliminados',fontsize=14)
ax1.set_ylim([0, 1.02])
ax2.set_ylim([0, 1.02])
ax1.set_ylabel('GCC',fontsize=14)
ax2.set_ylabel('SLCC',fontsize=14)
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs, loc=0,fontsize=14)

plt.show()

In [None]:
# Percolations with autodefense plot
fig, axs = plt.subplots(2,1,figsize=(20,20),dpi=80)
fig.suptitle('Evolución componentes principal en percolación aleatoria con autodefensa',fontsize=18)
fig.subplots_adjust(top=0.92)

x_val_gcc = [x[0] for x in scatter_gcc]
y_val_gcc = [x[1] for x in scatter_gcc]
x_val_sgcc = [x[0] for x in scatter_slcc]
y_val_sgcc = [x[1] for x in scatter_slcc]

ax2 = axs[0].twinx()
lns1 = axs[0].plot(x_val_gcc, y_val_gcc, 'g-',label="GCC")
lns2 =ax2.plot(x_val_gcc, y_val_sgcc, 'b-',label="SLCC")
lns = lns1+lns2
axs[0].set_xlabel('Intermediarios eliminados',fontsize=14)
axs[0].set_ylim([0, 1.02])
ax2.set_ylim([0, 1.02])
axs[0].set_ylabel('GCC',fontsize=14)
ax2.set_ylabel('SLCC',fontsize=14)
labs = [l.get_label() for l in lns]
axs[0].legend(lns, labs, loc=0,fontsize=14)
axs[0].set_title('Supervisada',fontsize=16,pad=20)

x_val_gcc = [x[0] for x in scatter_random_gcc]
y_val_gcc = [x[1] for x in scatter_random_gcc]
x_val_sgcc = [x[0] for x in scatter_random_slcc]
y_val_sgcc = [x[1] for x in scatter_random_slcc]

ax2 = axs[1].twinx()
lns1 = axs[1].plot(x_val_gcc, y_val_gcc, 'g-',label="GCC")
lns2 =ax2.plot(x_val_gcc, y_val_sgcc, 'b-',label="SLCC")
lns = lns1+lns2
axs[1].set_xlabel('Intermediarios eliminados',fontsize=14)
axs[1].set_ylim([0, 1.02])
ax2.set_ylim([0, 1.02])
axs[1].set_ylabel('GCC',fontsize=14)
ax2.set_ylabel('SLCC',fontsize=14)
labs = [l.get_label() for l in lns]
axs[1].legend(lns, labs, loc=0,fontsize=14)
axs[1].set_title('Aleatoria',fontsize=16,pad=20)
plt.show()