In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import json
import glob
import multiprocessing
import matplotlib.pyplot as plt
import os.path

In [None]:
from mult_func import get_neighbors, sample_network, pick_ego_network

In [None]:
data_sources = glob.glob("./Data/*")
data_sources

In [None]:
ds_to_files = {os.path.basename(data_src): glob.glob(os.path.join(data_src, "*edges*.csv")) for data_src in data_sources}
ds_to_files

In [None]:
np.random.seed(123)

In [None]:
def read_network_edges(in_source_file):
    df = pd.read_csv(in_source_file)
    df.rename(columns={df.columns[0]: "Source", df.columns[1]: "Target"}, inplace=True)
    num_edges = df.shape[0]
    num_nodes = len(set(df["Source"].unique()).union(df["Target"].unique()))
    if np.max(df.max()) == num_nodes - 1:
        print(f"Nodes indexed 0 to {num_nodes}")
    else:
        print("ERROR: Node index is incompatible")
        raise Exception("ERROR: Node index is incompatible")
    return df, num_nodes, num_edges

In [None]:
def generate_samples(in_edges_df, in_num_nodes, in_num_networks, in_sampled_net_size):
    param_list = ((r, in_edges_df, in_sampled_net_size) for r in np.random.choice(in_num_nodes, size=in_num_networks, replace=False))
    
    with multiprocessing.Pool(multiprocessing.cpu_count() - 1) as P:
        net_list = P.starmap(sample_network, param_list)
    
    net_list = pd.DataFrame(net_list, columns=["root","cc_mean","cc_std", "nodes", "G"])
    net_list.index.name = "idx"
    net_list[["root","cc_mean","cc_std"]].to_csv(f"./outputs/info_{data_file_name}_{net_size}.csv")
    
    net_list["cc_mean"].hist()
    plt.title("#nodes:{} #samples:{} , cc_mean ~ N({}, {})".format(in_sampled_net_size, in_num_networks, net_list["cc_mean"].mean(), net_list["cc_mean"].std()))
    plt.xlabel("Mean Clustering Coefficient")
    plt.ylabel("Count")
    plt.savefig(f"./outputs/cchist_{data_file_name}_{net_size}.png")
    plt.show()
    
    return net_list

In [None]:
def check_isomorphism(in_net_list):
    similar_graph_pairs = []
    for idxA in range(in_net_list.shape[0]):
        for idxB in range(idxA + 1, in_net_list.shape[0]):
            graphA = in_net_list.iloc[idxA]["G"]
            graphB = in_net_list.iloc[idxB]["G"]
            if nx.faster_could_be_isomorphic(graphA, graphB):
                print("Degree Sequences are equal: ", idxA, idxB)
                if nx.fast_could_be_isomorphic(graphA, graphB):
                    print("Triangle Sequences are equal: ", idxA, idxB)
                    if nx.could_be_isomorphic(graphA, graphB):
                        print("Clique Sequences are equal: ", idxA, idxB)
                        # if nx.is_isomorphic(graphA, graphB):
                        #     print("Is isomorphic: ", idxA, graphB)
                        similar_graph_pairs.append([idxA, idxB])
    return similar_graph_pairs

In [None]:
num_networks_per_type = 2
for data_src in ds_to_files:
    for data_file in ds_to_files[data_src]:
        print(data_file)
        data_file_name = os.path.basename(data_file)[:-4]
        df, num_nodes, num_edges = read_network_edges(ds_to_files["twitch_gamers"][0])
        for net_size in [250, 500, 1000]:
            net_list = generate_samples(df, num_nodes, num_networks_per_type, net_size)
            for idx in range(net_list.shape[0]):
                nx.write_edgelist(net_list.iloc[idx]["G"], f"./outputs/edges_{data_file_name}_{net_size}_{idx}.csv", delimiter=",", data=False)