In [None]:
import glob
import pandas as pd
from tqdm.notebook import tqdm
datadir = "./MAG_L2_Similarity/"
l2_nmi_files = glob.glob(datadir + "*_BC_CC*.tsv") + glob.glob(datadir + "*DC*.tsv")
print(l2_nmi_files)
df_l2_nmi = pd.concat((pd.read_csv(f, sep="\t") for f in l2_nmi_files))


In [None]:
df_l2_nmi

In [None]:
def lst_elem_to_float(lst):
    return [float(x) for x in lst]

def cm2inch(value):
    return value/2.54

def calc_granularity_clusters(cluster_list):
    # cluster list: list idx = cluster idx
    # list[cluster idx]: list of node indexes for a certain cluster
    cluster_count = len(cluster_list)
    total_count = sum([len(x) for x in cluster_list])
    gran_denom = sum([len(x)**2 for x in cluster_list])
    gran1 = total_count / gran_denom
    #gran2 = len(cluster_count) / gran_denom
    return gran1

def calc_new_gran(row):
    header = row["NetworkType"]
    r_layers = ast.literal_eval(row["LayerRatio"])
    r_layers = (float(x) for x in r_layers)
    
    fosname = row["L0FOS"]
    res = row["ResParm"]
    try:
        infile_fname = "./Cluster_out/" + header + "_" + fosname + "_r_" + "_".join([format(x, ".2f") for x in r_layers]) + "_" + format(res, ".2f") + ".clu"
        df_cluster = pd.read_csv(infile_fname, sep="\t", header=None, names=["PaperId", "ClusterId"])

        cluster_list = df_cluster.groupby('ClusterId')['PaperId'].apply(list).tolist()
        granularity = calc_granularity_clusters(cluster_list)
        return granularity        
        
    except:
        print(header, fosname, 
              r_layers, res, type(r_layers), type(row["LayerRatio"]))
        return -1

In [None]:
import ast
tqdm.pandas()

df_l2_nmi["new_gran"] = df_l2_nmi.progress_apply(lambda x: calc_new_gran(x), axis=1)

In [None]:
DF_FOS_LIST = df_l2_nmi["L0FOS"].unique()

df_l2_nmi.to_csv("./MAG_L2_Similarity/20250602_NEW_Granularity.tsv", sep="\t", index=None)

In [None]:
!head ./MAG_L2_Similarity/20250602_NEW_Granularity.tsv

In [None]:
df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")]
#              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 0.5, 0.5)]))
#              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"]

In [None]:
df_l2_nmi["LayerRatio"] = df_l2_nmi["LayerRatio"].map(lambda x: [float(i) for i in ast.literal_eval(x)])

In [None]:
from matplotlib import pyplot as plt


fig, axes = plt.subplots(2, 3, figsize =(cm2inch(19)*2, cm2inch(11)*2))
axes = axes.flatten()
for idx, FOS in enumerate(DF_FOS_LIST):
    now_ax = axes[idx]
    now_ax.set_xscale("log")
    #now_ax.set_yscale("log")
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 0.5, 0.5)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC-Hybrid (Balanced)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 0.1, 0.9)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC-Hybrid (BC Dominant)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0,0.9, 0.1)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC-Hybrid (CC Dominant)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-CC")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 1, 0)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "Co-citation", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 0, 1)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "Bibliographic Coupling", ax = now_ax)
    now_ax.legend(frameon=False)
    now_ax.set_title(FOS, size=14)
    now_ax.set_ylabel("NMI", size=14)    
    now_ax.set_xlabel("Granularity", size=14)

axes[0].legend(loc="upper left", frameon=False)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for index, ax in enumerate(fig.get_axes()):
    ax.yaxis.label.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.text(-0.2, 1.05, labellist[index], fontsize=14, weight='bold', transform=ax.transAxes)  
    ax.set_yticks([0.1 * i for i in range(10)])
    
axes[0].set_ylim(0.1, 0.4)
axes[1].set_ylim(0.15, 0.42)
axes[2].set_ylim(0.05, 0.4)
axes[3].set_ylim(0.1, 0.38)
axes[4].set_ylim(0.15, 0.45)
axes[5].set_ylim(0.14, 0.4)

plt.tight_layout()
plt.savefig("Figure2_regran_test_20250602.png")
#plt.savefig("Figure2.pdf")
plt.show()
plt.close()

In [None]:
fig, axes = plt.subplots(2, 3, figsize =(cm2inch(19)*2, cm2inch(11)*2))
axes = axes.flatten()
for idx, FOS in enumerate(DF_FOS_LIST):
    now_ax = axes[idx]    
    #now_ax.set_xscale("log")
    #now_ax.set_yscale("log")
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0,0.5, 0.5)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC (Balanced)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0,0.1, 0.9)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC (BC Dom.)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-Hybrid")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0,0.9, 0.1)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "BC-CC (CC Dom.)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T1")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(1/3, 1/3, 1/3)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "BC-CC-DC T1 (Balanced)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T1")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(1/10, 9/20, 9/20)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "BC-CC-DC T1 (BC-CC Dom.)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T2")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 1/3, 1/3, 1/3)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "BC-CC-DC T2 (Balanced)", ax = now_ax)
    now_ax.legend(frameon=False)
    now_ax.set_title(FOS, size=14)
    now_ax.set_ylabel("NMI", size=14)    
    now_ax.set_xlabel("Granularity", size=14)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for index, ax in enumerate(fig.get_axes()):
    ax.yaxis.label.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.text(-0.2, 1.05, labellist[index], fontsize=14, weight='bold', transform=ax.transAxes)     
    ax.set_yticks([0.1 * i for i in range(10)])

#axes[0].legend(loc="upper left", frameon=False)
axes[0].set_ylim(0.06, 0.5)
axes[1].set_ylim(0.15, 0.42)
axes[2].set_ylim(0.05, 0.4)
axes[3].set_ylim(0.1, 0.38)
axes[4].set_ylim(0.15, 0.45)
axes[5].set_ylim(0.14, 0.4)

plt.tight_layout()
plt.savefig("Figure4_regran_test_20250602.png")
#plt.savefig("Figure4.pdf")
plt.show()
plt.close()

In [None]:
fig, axes = plt.subplots(2, 3, figsize =(cm2inch(19)*2, cm2inch(11)*2))
axes = axes.flatten()
for idx, FOS in enumerate(DF_FOS_LIST):
    now_ax = axes[idx]    
    now_ax.set_xscale("log")
    now_ax.set_yscale("log")
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "DC-T1")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(1, 0, 0)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "DC-T1", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "DC-T2")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 0, 0, 1)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].plot(label = "DC-T2", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T1")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(1/3, 1/3, 1/3)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "Hybrid T1 (Balanced)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T1")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(1/10, 9/20, 9/20)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "Hybrid T1 (BC-CC Dom.)", ax = now_ax)
    df_l2_nmi[(df_l2_nmi["NetworkType"] == "NS-BC-CC-DC-Hybrid-T2")
              & (df_l2_nmi["LayerRatio"].map(tuple).isin([(0, 1/3, 1/3, 1/3)]))
              & (df_l2_nmi["L0FOS"] == FOS)].set_index("new_gran")["L2NMI"].sort_values().plot(label = "Hybrid T2 (Balanced)", ax = now_ax)
    now_ax.legend(frameon=False)
    now_ax.set_title(FOS, size=14)
    now_ax.set_ylabel("NMI", size=14)    
    now_ax.set_xlabel("Granularity", size=14)
    
axes[0].legend(loc="upper left", frameon=False)
labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for index, ax in enumerate(fig.get_axes()):
    ax.yaxis.label.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.text(-0.2, 1.05, labellist[index], fontsize=14, weight='bold', transform=ax.transAxes)  
    ax.set_yticks([0.1 * i for i in range(10)])

plt.tight_layout()

axes[0].set_ylim(0.06, 0.5)
axes[1].set_ylim(0.06, 0.48)
axes[2].set_ylim(0.0, 0.43)
axes[3].set_ylim(0.05, 0.4)
axes[4].set_ylim(0.08, 0.45)
axes[5].set_ylim(0.08, 0.42)

plt.tight_layout()
plt.savefig("Figure6_regran_test_20250602.png")
#splt.savefig("Figure6.pdf")
plt.show()
plt.close()