In [1]:
import numpy as np
import pandas as pd
import re, os

In [2]:
file_path = "../../../graph_data/com-dblp.ungraph.txt"
with open(file_path, 'r') as f:
    data_list = f.readlines()

In [3]:
def modify_pair(line, prob):
    pattern = r'\d+'
    matches = re.findall(pattern, line)
    st, end = matches[0], matches[1]
    return f"{st} {end} {str(prob)}\n"

In [4]:
OUTPUT_FILE = "./data/DBLP.txt"
cleaned_list = [modify_pair(line, 0.1) for line in data_list[4:]]

In [5]:
pattern = r'\d+'
matches = re.findall(pattern, data_list[2])
DBLP_n, DBLP_m = matches[0], matches[1]
num_nodes = int(DBLP_n)
res_list = [DBLP_n + '\n', DBLP_m + '\n'] + cleaned_list

In [9]:
import re

# Read in edge list file
edges = []
node_set = set()
out_nodes = set()
occupied_idx = [False for i in range(num_nodes)]
for line in cleaned_list:
    match = re.findall(r'\d+\.?\d*', line)
    u = int(match[0])
    v = int(match[1])
    w = float(match[2])
    node_set.add(u)
    node_set.add(v)
    edges.append((u, v, w))

    if u >= num_nodes:
        out_nodes.add(u)
    else:
        occupied_idx[u] = True
    if v >= num_nodes:
        out_nodes.add(v)
    else:
        occupied_idx[v] = True

# Determine the maximum node index
max_node_index = max(max(edge[:2]) for edge in edges)

# Create a mapping from old node indices to new ones
mapping = {}
new_index = 0
avai_idx = [i for i in range(num_nodes) if occupied_idx[i] == False]
for i in range(num_nodes):
    if occupied_idx[i]:
        mapping[i] = i

assert len(mapping.keys()) == num_nodes - len(out_nodes)
assert len(avai_idx) == len(out_nodes)
for i, node in enumerate(out_nodes):
    mapping[node] = avai_idx[i]

# Re-index the edges
new_edges = [(mapping[u], mapping[v], w) for u, v, w in edges]

In [10]:
# Write out the re-indexed edges to a new file
new_filename = "./data/DBLP_1.txt"

with open(new_filename, "w") as f:
    f.write(DBLP_n + '\n')
    f.write(DBLP_m + '\n')
    for u, v, w in new_edges:
        f.write(f"{u}\t{v}\t{w}\n")

In [2]:
def check_line_legal(line):
    pattern = r'^\d.*\d.*\n$'
    return re.match(pattern, line)

def get_edge_from_str(edge_str):
    pattern = r'\d+'
    matches = re.findall(pattern, edge_str)
    return (int(matches[0]), int(matches[1]))

def get_triple_from_str(edge_str):
    pattern = r"\d+\.\d+|\d+"
    matches = re.findall(pattern, edge_str)
    return (int(matches[0]), int(matches[1]), float(matches[2]))

def read_file(file_name = "./data/Epinions/soc-Epinions1.txt"):
    with open(file_name, 'r') as f:
        data_list = f.readlines()
    cleaned_data_list = []
    for line in data_list:
        if check_line_legal(line):
            cleaned_data_list.append(get_edge_from_str(line))
    del data_list
    return cleaned_data_list

def read_cleaned_prob_file(file_name = "./data/DBLP/edgelist_ic.txt"):
    with open(file_name, 'r') as f:
        data_list = f.readlines()
    cleaned_data_list = []
    for line in data_list:
        cleaned_data_list.append(get_triple_from_str(line))
    return cleaned_data_list

def re_index(edge_list):
    node_set = set()
    max_node = 0

    for edge in edge_list:
        node_set.add(edge[0])
        node_set.add(edge[1])
        max_node = max(max(edge[0], edge[1]), max_node)
    
    num_nodes = len(node_set)
    print("The largest index is:", max_node)
    print("The total number of nodes is:", num_nodes)
    if max_node <= len(node_set) - 1:
        print("NOT NEED REINDEXING")
        return None
    occupied_idx = [False for i in range(len(node_set))]
    out_nodes = []
    for node in node_set:
        if node >= num_nodes:
            out_nodes.append(node)
        else:
            occupied_idx[node] = True
    
    avai_idx = [i for i in range(num_nodes) if occupied_idx[i] == False]
    mapping = {}
    assert(len(avai_idx) == len(out_nodes))
    for i, node in enumerate(out_nodes):
        mapping[node] = avai_idx[i]
    new_edges = []
    for (u, v) in edge_list:
        new_u = u if u < num_nodes else mapping[u]
        new_v = v if v < num_nodes else mapping[v]
        new_edges.append((new_u, new_v))
    return new_edges

def data_preprocessing(filepath="./data/Epinions/soc-Epinions1.txt", p_setting=0.1):
    cur_dir = os.path.dirname(filepath)
    edge_list = read_file(filepath)
    new_edges = re_index(edge_list)
    if new_edges == None:
        print("Dataset is good.")
    with open(os.path.join(cur_dir, 'edgelist_ic.txt'), 'w') as f:
        for (u, v) in new_edges:
            f.write(f"{u}\t{v}\t{p_setting}\n")
    return

def modify_prob(filepath="./data/Epinions/soc-Epinions1.txt", p_setting=0.1):
    cur_dir = os.path.dirname(filepath)
    edge_list = read_cleaned_prob_file(filepath)
    with open(os.path.join(cur_dir, 'edgelist_ic.txt'), 'w') as f:
        for (u, v, w) in edge_list:
            f.write(f"{u}\t{v}\t{p_setting}\n")
    print("Finish modifying the probability to", p_setting)

In [9]:
data_preprocessing("./data/orkut/edgelist.txt")

The largest index is: 3072440
The total number of nodes is: 3072441
NOT NEED REINDEXING
Dataset is good.


In [1]:
with open("./data/orkut/edgelist_ic.txt", 'r') as f:
    lines = f.readlines()

In [2]:
lines[:10]

['0\t1\t0.1\n',
 '0\t2\t0.1\n',
 '0\t3\t0.1\n',
 '0\t4\t0.1\n',
 '0\t5\t0.1\n',
 '0\t6\t0.1\n',
 '0\t7\t0.1\n',
 '0\t8\t0.1\n',
 '0\t9\t0.1\n',
 '0\t10\t0.1\n']

In [5]:
def get_inf_from_csv(filepath):
    df = pd.read_csv(filepath)
    return float(df["expected spread"].iloc[-1])
MC_res = []
for i, rand_seed in enumerate([2020, 2021, 2022, 2023, 2024]):
    folder_path = f"./data/GRQC/params/50_10000_0.500000_0.001000_{rand_seed}_RANDseed_WC/"
    csv_filenames = [os.path.join(folder_path, f"k_inf_spread_MCGreedy_{i}.csv") for i in range(1000, 10001, 1000)]
    MC_res.append([get_inf_from_csv(filepath) for filepath in csv_filenames])

In [6]:
res = np.array(MC_res).mean(axis=0)
for i in range(10):
    print(f"({i+1}, {res[i]})")

(1, 446.26219999999995)
(2, 470.24080000000004)
(3, 502.73699999999997)
(4, 502.4652)
(5, 505.77220000000005)
(6, 520.9777999999999)
(7, 519.826)
(8, 524.028)
(9, 522.3344)
(10, 522.3902)


In [17]:
test = pd.read_csv(os.path.join(f"./data/nethept/params/50_10000_0.500000_0.001000_2023_RANDseed_WC/", "k_inf_spread_MCGreedy_1000.csv"))["expected spread"]
test.iloc[-1]

# Read data from files

In [15]:
BASELINE_NAMES = ["RAND", "OUTDEG", "PROB", "SINF", "UINF", "AIS-U" , "IMA"]
RAND_SEEDS = [2021, 2022, 2023, 2024, 2025]
K_LIST = [5, 10, 20, 30, 40, 50]
EPS_LIST = ["0.1", "0.2", "0.3", "0.4", "0.5"]
BETA_LIST = [1, 2, 4, 8, 16, 32, 64]
rand_seeds_nethept = [42, 2020, 2022, 2023, 2024]

def get_res_from_file(file_path, mode="result")->float:
    with open(file_path, 'r') as file:
        match = None
        for line in file:
            if mode == "result" and line.startswith('Result by'):
                match = re.search(r'\d+(\.\d+)?', line)
            elif mode == "origin" and line.startswith("Original Inf:"):
                match = re.search(r'\d+(\.\d+)?', line)
            
        if match:
            number = float(match.group())
            return number
        else:
            print("RESULT NOT FOUND", file_path)

def get_RRset_generation_time(file_path):
    with open(file_path, "r") as file:
        contents = file.read()

        # Use regular expressions to extract the RR set generation time
        pattern = r"->Time used \(sec\) for operation \[RR set generation\]: (\d+\.\d+|\d+)"
        match = re.search(pattern, contents)

        if match:
            rr_set_time = float(match.group(1))
            return rr_set_time
        else:
            print("RR set generation time not found")
            return -1.0

def get_process_time(file_path):
    with open(file_path, "r") as file:
        # Read the contents of the file
        contents = file.read()

        # Use regular expressions to find the line with the keyword "process"
        pattern = r".*process.*"
        match = re.search(pattern, contents, re.MULTILINE)

        if match:
            line = match.group(0)
            # Use regular expressions to extract the running time from the line
            time_pattern = r"\d+(\.\d+)?"
            time_match = re.search(time_pattern, line)
            if time_match:
                running_time = float(time_match.group(0))
                return running_time
            else:
                print("Running time not found in the line with the keyword 'process'")
        else:
            print("Line with the keyword 'process' not found")

def get_original_inf(dataset_name:str, rand_seeds):
    res_arr = []
    for rand_seed in rand_seeds:
        path = f"./data/{dataset_name}/params/5_0_0.500000_0.001000_{rand_seed}_WC/log_IMA.txt"
        res_arr.append(get_res_from_file(path, "origin"))
    return sum(res_arr) / len(res_arr)

def get_inf_beta_of_one_dataset(dataset_name:str, rand_seeds, baseline_names=["IMA"], k_edges=50):
    res = {}
    for baseline in baseline_names:
        inf_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            inf_for_one_rand_seed = []
            for beta in BETA_LIST:
                try:
                    if beta == 1:
                        path = f"./data/{dataset_name}/params/{k_edges}_0_0.500000_0.001000_{rand_seed}_WC/log_{baseline}.txt"
                    else:
                        path = f"./data/{dataset_name}/params/{k_edges}_0_0.500000_0.001000_{rand_seed}_WC_beta_{beta}/log_{baseline}.txt"
                    # print(path)
                    inf_for_one_rand_seed.append(get_res_from_file(path, "result"))
                except:
                    print(path)
            inf_diff_rand_seed_list.append(inf_for_one_rand_seed)
        
        x_mean = np.mean(np.array(inf_diff_rand_seed_list), axis=0)
        res[baseline] = x_mean / np.max(x_mean)
        # res[baseline] = np.mean(np.array(inf_diff_rand_seed_list), axis=0)
    return res

def get_time_beta_of_one_dataset(dataset_name:str, rand_seeds, baseline_names=["IMA"], k_edges=50):
    res = {}
    for baseline in baseline_names:
        time_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            time_for_one_rand_seed = []
            for beta in BETA_LIST:
                try:
                    if beta == 1:
                        working_folder = f"./data/{dataset_name}/params/{k_edges}_0_0.500000_0.001000_{rand_seed}_WC/"
                    else:
                        working_folder = f"./data/{dataset_name}/params/{k_edges}_0_0.500000_0.001000_{rand_seed}_WC_beta_{beta}/"
                    
                    log_path = os.path.join(working_folder, "logs.txt")
                    log_baseline_path = os.path.join(working_folder, f"log_{baseline}.txt")
                    RR_set_gen_time = get_RRset_generation_time(log_path)
                    process_time = get_process_time(log_baseline_path)
                    time_for_one_rand_seed.append(RR_set_gen_time + process_time)
                except Exception as e:
                    print(e.args)
                    print(baseline, rand_seed, k_edges)
                    print(working_folder)
            time_diff_rand_seed_list.append(time_for_one_rand_seed)
        
        res[baseline] = np.mean(np.array(time_diff_rand_seed_list), axis=0)
    return res

def get_inf_spread_of_one_dataset(dataset_name:str, rand_seeds):
    res = {}
    origin_inf = get_original_inf(dataset_name, rand_seeds)
    for baseline in BASELINE_NAMES:
        inf_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            inf_for_one_rand_seed = [origin_inf]
            for k_edges in K_LIST:
                try:
                    path = f"./data/{dataset_name}/params/{k_edges}_0_0.500000_0.001000_{rand_seed}_WC/log_{baseline}.txt"
                    # print(path)
                    inf_for_one_rand_seed.append(get_res_from_file(path, "result"))
                except:
                    print(baseline, rand_seed, k_edges)
                    print(path)
            inf_diff_rand_seed_list.append(inf_for_one_rand_seed)
        res[baseline] = np.mean(np.array(inf_diff_rand_seed_list), axis=0)
    return res

def get_eps_inf_of_one_dataset(dataset_name:str, rand_seeds, baseline_names=["IMA"], k_edges=50):
    res = {}
    
    for baseline in baseline_names:
        inf_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            inf_for_one_rand_seed = []
            for eps in EPS_LIST:
                try:
                    path = f"./data/{dataset_name}/params/{k_edges}_0_{eps}00000_0.001000_{rand_seed}_WC/log_{baseline}.txt"
                    # print(path)
                    inf_for_one_rand_seed.append(get_res_from_file(path, "result"))
                except:
                    print(path)
            inf_diff_rand_seed_list.append(inf_for_one_rand_seed)
        x_mean = np.mean(np.array(inf_diff_rand_seed_list), axis=0)
        res[baseline] = x_mean / np.max(x_mean)
    return res

def get_eps_time_of_one_dataset(dataset_name:str, rand_seeds, baseline_names=["IMA"]):
    res = {}
    for baseline in baseline_names:
        time_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            time_for_one_rand_seed = []
            for eps in EPS_LIST:
                try:
                    working_folder = f"./data/{dataset_name}/params/50_0_{eps}00000_0.001000_{rand_seed}_WC/"
                    log_path = os.path.join(working_folder, "logs.txt")
                    log_baseline_path = os.path.join(working_folder, f"log_{baseline}.txt")
                    RR_set_gen_time = get_RRset_generation_time(log_path)
                    process_time = get_process_time(log_baseline_path)
                    time_for_one_rand_seed.append(RR_set_gen_time + process_time)
                except Exception as e:
                    print(e.args)
                    print(working_folder)
            time_diff_rand_seed_list.append(time_for_one_rand_seed)
        res[baseline] = np.mean(np.array(time_diff_rand_seed_list), axis=0)
    return res

def get_runtime_of_one_dataset(dataset_name:str, rand_seeds, baseline_names=["SINF", "UINF", "AIS-U" , "IMA"], k_list=K_LIST, seed_mode="IM",
                               num_cand_edges=0):
    res = {}
    for baseline in baseline_names:
        time_diff_rand_seed_list = []
        for rand_seed in rand_seeds:
            time_for_one_rand_seed = []
            for k_edges in k_list:
                try:
                    working_folder = f"./data/{dataset_name}/params/{k_edges}_{num_cand_edges}_0.500000_0.001000_{rand_seed}_WC/"
                    if seed_mode == "RAND":
                        working_folder = f"./data/{dataset_name}/params/{k_edges}_{num_cand_edges}_0.500000_0.001000_{rand_seed}_RANDseed_WC/"
                    log_path = os.path.join(working_folder, "logs.txt")
                    log_baseline_path = os.path.join(working_folder, f"log_{baseline}.txt")
                    RR_set_gen_time = get_RRset_generation_time(log_path)
                    process_time = get_process_time(log_baseline_path)
                    if baseline in ["SINF", "UINF", "AIS-U" , "IMA"]:
                        time_for_one_rand_seed.append(RR_set_gen_time + process_time)
                    else:
                        time_for_one_rand_seed.append(process_time)
                except Exception as e:
                    print(e.args)
                    print(working_folder)
            time_diff_rand_seed_list.append(time_for_one_rand_seed)
        res[baseline] = np.mean(np.array(time_diff_rand_seed_list), axis=0)
    return res

def get_runtime_k_mcg(dataset_name:str, rand_seeds:list, k_list=K_LIST, seed_mode="RAND", num_cand_edges=10000):
    res = {}

    time_diff_rand_seed_list = []
    for rand_seed in rand_seeds:
        time_for_one_rand_seed = []
        try:
            working_folder = f"./data/{dataset_name}/params/50_{num_cand_edges}_0.500000_0.001000_{rand_seed}_WC/"
            if seed_mode == "RAND":
                working_folder = f"./data/{dataset_name}/params/50_{num_cand_edges}_0.500000_0.001000_{rand_seed}_RANDseed_WC/"
            log_path = os.path.join(working_folder, "MCGreedy_10000_time.txt")
            with open(log_path, 'r') as f:
                contents = f.read()
                for k_edges in k_list:
                    pattern = r"\[select_" + str(k_edges) + r"\]:\s+(\d+(\.\d+)?)"
                    match = re.search(pattern, contents)
                    time_for_one_rand_seed.append(float(match.group(1)))
        except Exception as e:
            print(e.args)
            print(working_folder)
        time_diff_rand_seed_list.append(time_for_one_rand_seed)
        res["MCGreedy"] = np.mean(np.array(time_diff_rand_seed_list), axis=0)
    return res

def get_runtime_r_mcg(dataset_name:str, rand_seeds:list, r_list=[2000, 4000, 6000, 8000, 10000], seed_mode="RAND", num_cand_edges=10000):
    res = {}

    time_diff_rand_seed_list = []
    for rand_seed in rand_seeds:
        time_for_one_rand_seed = []
        try:
            working_folder = f"./data/{dataset_name}/params/50_{num_cand_edges}_0.500000_0.001000_{rand_seed}_WC/"
            if seed_mode == "RAND":
                working_folder = f"./data/{dataset_name}/params/50_{num_cand_edges}_0.500000_0.001000_{rand_seed}_RANDseed_WC/"
            for r in r_list:
                log_path = os.path.join(working_folder, f"MCGreedy_{str(r)}_time.txt")
                with open(log_path, 'r') as f:
                    contents = f.read()
                    pattern = r"\[select_50\]:\s+(\d+(\.\d+)?)"
                    match = re.search(pattern, contents)
                    time_for_one_rand_seed.append(float(match.group(1)))
        except Exception as e:
            print(e.args)
            print(working_folder)
            print(r)
        time_diff_rand_seed_list.append(time_for_one_rand_seed)
        res["MCGreedy"] = np.mean(np.array(time_diff_rand_seed_list), axis=0)
    return res

def display_for_tex(method_inf_dict, method="IMA", k_list = [0, 5, 10, 20, 30, 40, 50]):
    for i in range(len(method_inf_dict[method])):
        print(f"({k_list[i]},{round(method_inf_dict[method][i], 4)})")

def display_for_tex_all(method_inf_dict, k_list = [0, 5, 10, 20, 30, 40, 50]):
    for method in method_inf_dict.keys():
        print(method + ":")
        display_for_tex(method_inf_dict, method, k_list)

## Small Datasets with 10000 candidate edges
### Baseline Methods

In [3]:
# runtime_grqc = get_runtime_of_one_dataset("GRQC", RAND_SEEDS, baseline_names=["IMA"], seed_mode="RAND", num_cand_edges=10000)
# runtime_nethept = get_runtime_of_one_dataset("nethept", rand_seeds_nethept, baseline_names=["IMA"], seed_mode="RAND", num_cand_edges=10000)
runtime_grqc = get_runtime_of_one_dataset("GRQC", RAND_SEEDS, baseline_names=["IMA"], seed_mode="IM", num_cand_edges=10000)
runtime_nethept = get_runtime_of_one_dataset("nethept", rand_seeds_nethept, baseline_names=["IMA"], seed_mode="IM", num_cand_edges=10000)


### MCGreedy data

In [27]:
MCG_time_k_grqc = get_runtime_k_mcg("GRQC", RAND_SEEDS)
MCG_time_k_nethept = get_runtime_k_mcg("nethept", RAND_SEEDS)
runtime_r_GRQC_step2k = get_runtime_r_mcg("GRQC", RAND_SEEDS)
runtime_r_nethept_step2k = get_runtime_r_mcg("nethept", RAND_SEEDS)
runtime_r_GRQC = get_runtime_r_mcg("GRQC", RAND_SEEDS, [i*1000 for i in range(1, 6)])
runtime_r_nethept = get_runtime_r_mcg("nethept", RAND_SEEDS, [i*1000 for i in range(1, 6)])
runtime_r_GRQC_IM = get_runtime_r_mcg("GRQC", RAND_SEEDS, [1000, 2000, 3000, 4000, 5000], "IM")
runtime_r_nethept_IM = get_runtime_r_mcg("nethept", RAND_SEEDS, [1000, 2000, 3000, 4000, 5000], "IM")

In [28]:
display_for_tex(runtime_r_nethept, "MCGreedy", [i for i in range(1, 6)])

(1,304.4098)
(2,326.0024)
(3,404.653)
(4,434.4754)
(5,640.8142)


In [11]:
display_for_tex(runtime_r_nethept_IM, "MCGreedy", [i*1000 for i in range(1,6)])

(1000,1510.126)
(2000,3024.026)
(3000,4641.218)
(4000,6418.404)
(5000,7780.71)


In [27]:
display_for_tex(runtime_nethept, "IMA", K_LIST)

(5,0.0957)
(10,0.3037)
(20,1.0251)
(30,2.2648)
(40,4.0737)
(50,6.259)


In [3]:
inf_beta_nethept = get_inf_beta_of_one_dataset("nethept", rand_seeds_nethept)
inf_beta_epinion = get_inf_beta_of_one_dataset("Epinions", RAND_SEEDS)
inf_beta_dblp = get_inf_beta_of_one_dataset("DBLP", RAND_SEEDS)
inf_beta_orkut = get_inf_beta_of_one_dataset("orkut", RAND_SEEDS)
inf_beta_twitter = get_inf_beta_of_one_dataset("twitter", RAND_SEEDS)

In [4]:
display_for_tex_all(inf_beta_twitter, [i for i in range(len(BETA_LIST))])

IMA:
(0,0.9984)
(1,0.9984)
(2,0.9984)
(3,0.9982)
(4,0.9999)
(5,0.9993)
(6,1.0)


In [5]:
time_beta_nethept = get_time_beta_of_one_dataset("nethept", rand_seeds_nethept)
time_beta_epinion = get_time_beta_of_one_dataset("Epinions", RAND_SEEDS)
time_beta_dblp = get_time_beta_of_one_dataset("DBLP", RAND_SEEDS)
time_beta_orkut = get_time_beta_of_one_dataset("orkut", RAND_SEEDS)
time_beta_twitter = get_time_beta_of_one_dataset("twitter", RAND_SEEDS)

In [6]:
display_for_tex_all(time_beta_twitter, [i for i in range(len(BETA_LIST))])

IMA:
(0,699.4426)
(1,459.037)
(2,339.8896)
(3,284.2249)
(4,243.919)
(5,224.9127)
(6,222.3034)


## EPSILON

In [4]:
eps_inf_nethept = get_eps_inf_of_one_dataset("nethept", rand_seeds_nethept)
eps_inf_epin = get_eps_inf_of_one_dataset("Epinions", RAND_SEEDS)
eps_inf_dblp = get_eps_inf_of_one_dataset("DBLP", RAND_SEEDS)
eps_inf_orkut = get_eps_inf_of_one_dataset("orkut", RAND_SEEDS)
eps_inf_twitter = get_eps_inf_of_one_dataset("twitter", RAND_SEEDS[:4])

In [5]:
display_for_tex_all(eps_inf_twitter, EPS_LIST)

IMA:
(0.1,1.0)
(0.2,1.0)
(0.3,0.9999)
(0.4,0.9999)
(0.5,0.9998)


In [8]:
eps_time_nethept = get_eps_time_of_one_dataset("nethept", rand_seeds_nethept)
eps_time_epin = get_eps_time_of_one_dataset("Epinions", RAND_SEEDS)
eps_time_dblp = get_eps_time_of_one_dataset("DBLP", RAND_SEEDS)
eps_time_orkut = get_eps_time_of_one_dataset("orkut", RAND_SEEDS)
eps_time_twitter = get_eps_time_of_one_dataset("twitter", RAND_SEEDS)

In [9]:
display_for_tex_all(eps_time_twitter, [0.1, 0.2, 0.3, 0.4, 0.5])

IMA:
(0.1,12171.155)
(0.2,3112.0224)
(0.3,1507.3786)
(0.4,941.1138)
(0.5,699.4426)


In [10]:
display_for_tex_all(eps_inf_nethept, [0.1, 0.2, 0.3, 0.4, 0.5])
display_for_tex_all(eps_inf_epin, [0.1, 0.2, 0.3, 0.4, 0.5])
display_for_tex_all(eps_inf_dblp, [0.1, 0.2, 0.3, 0.4, 0.5])
display_for_tex_all(eps_inf_orkut, [0.1, 0.2, 0.3, 0.4, 0.5])

IMA:
(0.1,0.9997)
(0.2,0.9998)
(0.3,1.0)
(0.4,0.9999)
(0.5,0.9992)
IMA:
(0.1,1.0)
(0.2,0.9999)
(0.3,0.9998)
(0.4,0.9998)
(0.5,1.0)
IMA:
(0.1,0.9999)
(0.2,1.0)
(0.3,0.9999)
(0.4,1.0)
(0.5,0.9997)
IMA:
(0.1,0.9999)
(0.2,0.9999)
(0.3,0.9999)
(0.4,1.0)
(0.5,0.9999)


In [14]:
runtime_nethept = get_runtime_of_one_dataset("nethept", rand_seeds_nethept, baseline_names=BASELINE_NAMES)
runtime_epin = get_runtime_of_one_dataset("Epinions", RAND_SEEDS, baseline_names=BASELINE_NAMES)
runtime_dblp = get_runtime_of_one_dataset("DBLP", RAND_SEEDS, baseline_names=BASELINE_NAMES)
runtime_orkut = get_runtime_of_one_dataset("orkut", RAND_SEEDS, baseline_names=BASELINE_NAMES)
runtime_twitter = get_runtime_of_one_dataset("twitter", RAND_SEEDS, baseline_names=BASELINE_NAMES)

In [15]:
display_for_tex_all(runtime_twitter, [5, 10, 20, 30, 40, 50])

RAND:
(5,9.9884)
(10,11.7806)
(20,16.2521)
(30,19.319)
(40,21.4907)
(50,26.7651)
OUTDEG:
(5,29.7318)
(10,29.9397)
(20,31.1671)
(30,29.7717)
(40,30.6306)
(50,32.1122)
PROB:
(5,52.178)
(10,52.243)
(20,53.6903)
(30,52.2959)
(40,53.2776)
(50,52.6929)
SINF:
(5,22.5067)
(10,52.9484)
(20,146.2096)
(30,271.7037)
(40,447.0782)
(50,641.6552)
UINF:
(5,24.3755)
(10,57.2389)
(20,153.8611)
(30,285.171)
(40,460.8796)
(50,663.262)
AIS-U:
(5,29.7778)
(10,68.4991)
(20,177.9643)
(30,318.9664)
(40,504.696)
(50,734.8698)
IMA:
(5,28.0647)
(10,64.1785)
(20,164.5828)
(30,302.3398)
(40,490.6986)
(50,699.4426)


In [18]:
display_for_tex(runtime_nethept, "IMA", K_LIST)

(5,0.5917)
(10,2.303)
(20,9.3477)
(30,21.0046)
(40,38.5388)
(50,58.8318)


In [12]:
get_runtime_of_one_dataset("DBLP", RAND_SEEDS)

{'SINF': array([ 0.7288724,  1.898564 ,  5.600618 , 11.020008 , 18.549964 ,
        28.136332 ]),
 'UINF': array([ 1.600365 ,  3.6491004,  9.106902 , 16.295694 , 25.62202  ,
        36.99108  ]),
 'AIS-U': array([ 1.801865 ,  4.0509464,  9.903734 , 17.479322 , 27.1619   ,
        38.86684  ]),
 'IMA': array([ 1.818009 ,  4.0850864,  9.97756  , 17.612822 , 27.38072  ,
        39.31004  ])}

In [10]:
res_nethept = get_inf_spread_of_one_dataset("nethept", rand_seeds_nethept)
res_epinions = get_inf_spread_of_one_dataset("Epinions", RAND_SEEDS)
res_dblp = get_inf_spread_of_one_dataset("DBLP", RAND_SEEDS)
# res_orkut = get_inf_spread_of_one_dataset("orkut", RAND_SEEDS)
# res_twitter = get_inf_spread_of_one_dataset("twitter", RAND_SEEDS)

In [11]:
display_for_tex_all(res_epinions)

RAND:
(0,12050.22)
(5,12053.06)
(10,12061.2)
(20,12082.0)
(30,12081.26)
(40,12081.02)
(50,12086.58)
OUTDEG:
(0,12050.22)
(5,12072.62)
(10,12089.4)
(20,12108.36)
(30,12105.86)
(40,12118.3)
(50,12141.26)
PROB:
(0,12050.22)
(5,12059.14)
(10,12068.92)
(20,12096.46)
(30,12102.1)
(40,12105.6)
(50,12110.98)
SINF:
(0,12050.22)
(5,12122.44)
(10,12207.0)
(20,12346.58)
(30,12461.46)
(40,12562.64)
(50,12655.48)
UINF:
(0,12050.22)
(5,12121.82)
(10,12197.16)
(20,12340.28)
(30,12434.8)
(40,12529.92)
(50,12621.9)
AIS-U:
(0,12050.22)
(5,12211.14)
(10,12361.68)
(20,12616.5)
(30,12846.62)
(40,13068.54)
(50,13262.12)
IMA:
(0,12050.22)
(5,12227.24)
(10,12381.96)
(20,12659.82)
(30,12893.82)
(40,13114.1)
(50,13316.74)
