In [5]:
import networkx as nx
# from simulator import CovertGenerator, DarkGenerator
import numpy as np
import pandas as pd
import json, os, sys
from tqdm import trange, tqdm
from utility import read_gml, getRobustness, HXA

mapping = {
    1: "911.gml", 
    2: 'DOMESTICTERRORWEB.gml', 
    3: 'suicide.gml', 4: 'HAMBURG_TIE_YEAR.gml',
    5: 'HEROIN_DEALING.gml', 
    7: 'MAIL.gml'
}

## Heuristic based (Empirical)

In [6]:
if not os.path.exists(f"../empirical_data/hxa_node_hist"):
    os.mkdir(f"../empirical_data/hxa_node_hist")

if not os.path.exists(f"../empirical_data/hxa_reward_hist"):
    os.mkdir(f"../empirical_data/hxa_reward_hist")

for idx, file in mapping.items():
    g = nx.read_gml("../empirical_data/" + file)
    node_mapping = {node: i for i, node in enumerate(g.nodes())}
    g = nx.relabel_nodes(g, node_mapping)

    hxa_dct, reward_dct = dict(), dict()
    for method in ['HDA', 'HBA', 'HCA', 'HPRA']:
        sol, reward = HXA(g=g, method=method)
        hxa_dct[method] = sol
        reward_dct[method] = (np.cumsum(reward) / g.number_of_nodes()).tolist()
    with open(f"../empirical_data/hxa_node_hist/g_{idx}.json", "w") as json_file:
        json.dump(hxa_dct, json_file) 
    with open(f"../empirical_data/hxa_reward_hist/g_{idx}.json", "w") as json_file:
        json.dump(reward_dct, json_file)

## FINDER based (Empirical)

In [10]:
for idx, file in mapping.items():
    if idx == 1:
        methods = ["empirical"]
    else:
        methods = ["ba", "dark", "covert", "empirical"]
    for model in methods:
        if idx == 1 and model != "empirical":
            break
        finder_dct = dict()
        reward_lst = []
        g = read_gml(data_dir="../empirical_data/", file_name=file)
        G = g.copy()
        df = pd.read_csv(f"../empirical_data/finder_node_hist/{model}_{idx}.txt", header=None).rename(columns={0: "round", 1: "node"})

        for row_idx, (round, node) in df.iterrows():
            reward_lst.append(getRobustness(g, G, int(node)))
            
        for _ in range(g.number_of_nodes() - len(reward_lst)):
            GCCsize = len(max(nx.connected_components(g), key=len))
            reward_lst.append(1 - 1/GCCsize)

        reward_lst = (np.cumsum(reward_lst) / g.number_of_nodes()).tolist()

        with open(f"../empirical_data/finder_reward_hist/{model}_{idx}.json", "w") as json_file:
            json.dump(reward_lst, json_file)


## Heuristic based (Synthetic)

In [None]:
gtype = "ba"
for i in trange(100):
    hxa_dct, reward_dct = {}, {}
    g = read_gml(data_dir=f"./{gtype}/", file_name=f"g_{i}")
    G = g.copy()
    for method in ['HDA', 'HBA', 'HCA', 'HPRA']:
        sol, reward = HXA(g=g, method=method)
        hxa_dct[method] = sol
        reward_dct[method] = (np.cumsum(reward) / g.number_of_nodes()).tolist()
        
    with open(f"./{gtype}/hxa_node_hist/g_{i}.json", "w") as json_file:
        json.dump(hxa_dct, json_file) 
    with open(f"./{gtype}/hxa_reward_hist/g_{i}.json", "w") as json_file:
        json.dump(reward_dct, json_file)

: 

## FINDER based (Synthetic)

In [None]:
for i in trange(10):
    finder_dct = dict()
    reward_lst = []
    g = read_gml(data_dir="./ba/", file_name=f"g_{i}")
    G = g.copy()
    df = pd.read_csv(f"./ba/finder_node_hist/g_{i}.txt", header=None).rename(columns={0: "round", 1: "node"})
     
    for row_idx, (round, node) in df.iterrows():
        reward_lst.append(getRobustness(g, G, int(node)))
        
    for _ in range(g.number_of_nodes() - len(reward_lst)):
        GCCsize = len(max(nx.connected_components(g), key=len))
        reward_lst.append(1 - 1/GCCsize)

    reward_lst = (np.cumsum(reward_lst) / g.number_of_nodes()).tolist()

    with open(f"./ba/finder_reward_hist/g_{i}.json", "w") as json_file:
        json.dump(reward_lst, json_file)


: 

## FINDER action w.r.t HXA

In [None]:
from scipy.stats import percentileofscore

rank_dct = {
    "HDA": [], "HBA": [], 
    "HCA": [], "HPRA": []
}

for i in range(100):
    G = read_gml(data_dir="./ba/", file_name=f"g_{i}")
    df = pd.read_csv(f"./ba/finder_node_hist/g_{i}.txt", header=None).rename(columns={0: "round", 1: "node"})    
    
    rank_lst = []
    for _, (round, n) in df.iterrows():
        try:
            for method in ['HDA', 'HBA', 'HCA', 'HPRA']:
                if method == 'HDA':
                    dc = nx.degree_centrality(G)
                elif method == 'HBA':
                    dc = nx.betweenness_centrality(G)
                elif method == 'HCA':
                    dc = nx.closeness_centrality(G)
                elif method == 'HPRA':
                    dc = nx.pagerank(G)

                rank = percentileofscore(list(dc.values()), dc[n])
                rank_lst.append(rank)

            removed = [n for n in G.neighbors(int(n))] + [int(n)]
            for n in removed:
                G.remove_node(n)

        except:
            pass
    rank_dct[method].append(rank_lst)

: 

: 

In [None]:
for method in ['HDA', 'HBA', 'HCA', 'HPRA']:
    avg_lst = []
    for rank_lst in rank_dct[method]:
        for i in rank_lst:
            avg_lst.append(i)
#         avg_lst.append(np.mean(rank_lst))
    plt.xlabel('Pertile')
    plt.ylabel('Frequency')
    
    plt.hist(avg_lst, bins=20, color="blue")
    plt.title(f"BA: {method} percetntile (all_round)")
    plt.savefig(f"./ba/{method}_all_round.png") 
    

: 