In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import glob
from collections import defaultdict
from collections import Counter
import csv
from clusim.clustering import Clustering, print_clustering
import clusim.sim as sim
import math
from multiprocessing import Pool
from tqdm.notebook import tqdm
import itertools
import numpy as np
from matplotlib.patches import Rectangle

In [None]:
def lst_elem_to_float(lst):
    return [float(x) for x in lst]

def cm2inch(value):
    return value/2.54

In [None]:
quantile_df = pd.read_csv("../MAG_L2_Similarity/20201028_NMI_BY_CITATION_INFILED_QUANTILE_RE.tsv", sep="\t")

In [None]:
quantile_df["net_type"].unique()

In [None]:
def stup_to_ftup(tup):
    tup = eval(tup)
    reslist = [float(x) for x in tup]
    return tuple(reslist)

In [None]:
#DF_FOS_LIST = quantile_df["field_name"].unique()

In [None]:
DF_FOS_LIST = np.array(['Sociology', 'Computer science', 'Economics', 'Materials science', 'Mathematics', 'Physics'])

In [None]:
fig, axes = plt.subplots(6, 4, figsize =(cm2inch(19)*2, cm2inch(21)*2))
pct_lb_list = [0, 0.25, 0.5, 0.75]
idx = 0
for row, FOS in enumerate(DF_FOS_LIST):
    for col, lb in enumerate(pct_lb_list):
        now_ax = axes[row, col]
        now_ax.set_xscale("log")
        #now_ax.set_yscale("log")
        quantile_df[(quantile_df["net_type"] == "DC-T1")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(1, 0, 0)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "DC-T1", ax = now_ax)
        quantile_df[(quantile_df["net_type"] == "DC-T2")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0, 0, 0, 1)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "DC-T2", ax = now_ax)
        now_ax.legend(frameon=False)
        #now_ax.set_title(FOS, size=14)
        now_ax.set_ylabel("NMI", size=14)
        now_ax.set_xlabel("Granularity", size=14)
        now_ax.tick_params(axis='x', labelsize=14)
        now_ax.tick_params(axis='y', labelsize=14)
fig.text(0.165 + 0.245 * 0, 0.99, "Q4", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 1, 0.99, "Q3", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 2, 0.99, "Q2", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 3, 0.99, "Q1", transform=fig.transFigure, size=18, ha="center", va="center")

for idx, FOS in enumerate(DF_FOS_LIST):
    fig.text(0.01, 0.92 - idx*2*0.0815 , FOS, transform=fig.transFigure, size=18, ha="center", va="center", rotation=90)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for row, FOS in enumerate(DF_FOS_LIST):
    axes[row, 0].text(-0.2, 1.05, labellist[row], fontsize=14, weight='bold', transform=axes[row, 0].transAxes)  

fig.tight_layout(rect=[0.01,0,1,0.99])
plt.savefig("Figure7-SUP1.png")
plt.savefig("Figure7-SUP1.pdf")
fig.show()

In [None]:
fig, axes = plt.subplots(6, 4, figsize =(cm2inch(19)*2, cm2inch(21)*2))
pct_lb_list = [0, 0.25, 0.5, 0.75]
idx = 0

for i in range(6):
    fig.patches.extend([plt.Rectangle((0,0.01 + i*0.1628), 1, 0.159, 
                                  fill=True, color='grey', alpha=0.1,
                                  transform=fig.transFigure, figure=fig)])

for row, FOS in enumerate(DF_FOS_LIST):
    for col, lb in enumerate(pct_lb_list):
        now_ax = axes[row, col]
        now_ax.set_xscale("log")
        #now_ax.set_yscale("log")
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T1")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T1", ax = now_ax)
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T2")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0, 0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T2", ax = now_ax)
        now_ax.legend(frameon=False)
        #now_ax.set_title(FOS, size=14)
        now_ax.set_ylabel("NMI", size=14)
        now_ax.set_xlabel("Granularity", size=14)
        now_ax.tick_params(axis='x', labelsize=14)
        now_ax.tick_params(axis='y', labelsize=14)
fig.text(0.165 + 0.245 * 0, 0.99, "Q4", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 1, 0.99, "Q3", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 2, 0.99, "Q2", transform=fig.transFigure, size=18, ha="center", va="center")
fig.text(0.165 + 0.245 * 3, 0.99, "Q1", transform=fig.transFigure, size=18, ha="center", va="center")

for idx, FOS in enumerate(DF_FOS_LIST):
    fig.text(0.01, 0.903 - idx*2*0.0815 , FOS, transform=fig.transFigure, size=18, ha="center", va="center", rotation=90)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for row, FOS in enumerate(DF_FOS_LIST):
    axes[row, 0].text(-0.2, 1.03, labellist[row], fontsize=14, weight='bold', transform=axes[row, 0].transAxes)  

fig.tight_layout(rect=[0.01,0,1,0.99])
plt.savefig("Figure7-SUP2.png")
plt.savefig("Figure7-SUP2.pdf")
fig.show();

In [None]:
fig, axes = plt.subplots(6, 4, figsize =(cm2inch(19)*2, cm2inch(21)*2))
pct_lb_list = [0, 0.25, 0.5, 0.75]
idx = 0

for i in range(4):
    fig.patches.extend([plt.Rectangle((0.02 + i*0.245, 0), 0.235, 1, 
                                  fill=True, color='green', alpha=0.2, zorder=0,
                                  transform=fig.transFigure, figure=fig)])
    fig.text(0.14 + 0.247 * i, 0.99, f"Q{4-i}", transform=fig.transFigure, size=18, ha="center", va="center")

for i in range(6):
    fig.patches.extend([plt.Rectangle((0,0.009 + i*0.1628), 1, 0.158, 
                                  fill=True, color='purple', alpha=0.2, zorder=0,
                                  transform=fig.transFigure, figure=fig)])
    
for row, FOS in enumerate(DF_FOS_LIST):
    for col, lb in enumerate(pct_lb_list):
        now_ax = axes[row, col]
        now_ax.set_xscale("log")
        #now_ax.set_yscale("log")
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T1")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T1", ax = now_ax)
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T2")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0, 0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T2", ax = now_ax)
        now_ax.legend(frameon=False)
        #now_ax.set_title(FOS, size=14)
        now_ax.set_ylabel("NMI", size=14)
        now_ax.set_xlabel("Granularity", size=14)
        now_ax.tick_params(axis='x', labelsize=14)
        now_ax.tick_params(axis='y', labelsize=14)
        now_ax.set_facecolor("w")
        now_ax.set_zorder(1000)
        
plt.rcParams['axes.facecolor'] = "yellow"

for idx, FOS in enumerate(DF_FOS_LIST):
    fig.text(0.01, 0.903 - idx*2*0.0815 , FOS, transform=fig.transFigure, size=18, ha="center", va="center", rotation=90)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for row, FOS in enumerate(DF_FOS_LIST):
    axes[row, 0].text(-0.2, 1.05, labellist[row], fontsize=14, weight='bold', transform=axes[row, 0].transAxes)  

fig.tight_layout(rect=[0.01,0,1,0.99])
plt.savefig("Figure7-SUP2-BIS1.png")
plt.savefig("Figure7-SUP2-BIS1.pdf")
fig.show();

In [None]:
fig, axes = plt.subplots(6, 4, figsize =(cm2inch(19)*2, cm2inch(21)*2))
pct_lb_list = [0, 0.25, 0.5, 0.75]

for i in range(4):
    fig.patches.extend([plt.Rectangle((0.02 + i*0.245, 0), 0.235, 1, 
                                  fill=True, color='orange', alpha=0.25, zorder=0,
                                  transform=fig.transFigure, figure=fig)])
    fig.text(0.14 + 0.247 * i, 0.99, f"Q{4-i}", transform=fig.transFigure, size=18, ha="center", va="center")

for i in range(6):
    fig.patches.extend([plt.Rectangle((0,0.009 + i*0.1628), 1, 0.158, 
                                  fill=True, color='blue', alpha=0.2, zorder=1,
                                  transform=fig.transFigure, figure=fig)])
    
for row, FOS in enumerate(DF_FOS_LIST):
    for col, lb in enumerate(pct_lb_list):
        now_ax = axes[row, col]
        now_ax.set_xscale("log")
        #now_ax.set_yscale("log")
        quantile_df[(quantile_df["net_type"] == "DC-T1")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(1, 0, 0)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "DC-T1", ax = now_ax)
        quantile_df[(quantile_df["net_type"] == "DC-T2")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0, 0, 0, 1)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "DC-T2", ax = now_ax)        
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T1")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T1", ax = now_ax)
        quantile_df[(quantile_df["net_type"] == "NS-BC-CC-DC-Hybrid-T2")
                  & (quantile_df["r_layer"].map(stup_to_ftup).isin([(0, 0.33, 0.33, 0.33)]))
                  & (quantile_df["field_name"] == FOS)
                  & (quantile_df["pct_lb"] ==lb) ].set_index("granularity").sort_index()["nmi"].plot(label = "Hybrid-T2", ax = now_ax)
        now_ax.legend(frameon=False, fontsize=9)
        #now_ax.set_title(FOS, size=14)
        now_ax.set_ylabel("NMI", size=14)
        now_ax.set_xlabel("Granularity", size=14)
        now_ax.tick_params(axis='x', labelsize=14)
        now_ax.tick_params(axis='y', labelsize=14)
        now_ax.set_facecolor("w")
        now_ax.set_zorder(1000)
        
plt.rcParams['axes.facecolor'] = "yellow"

for idx, FOS in enumerate(DF_FOS_LIST):
    fig.text(0.01, 0.903 - idx*2*0.0815 , FOS, transform=fig.transFigure, size=18, ha="center", va="center", rotation=90)

labellist = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)", "(g)", "(h)", "(i)", "(j)", "(k)", "(l)", "(m)", "(n)", "(o)", "(p)"]
for row, FOS in enumerate(DF_FOS_LIST):
    axes[row, 0].text(-0.2, 1.05, labellist[row], fontsize=14, weight='bold', transform=axes[row, 0].transAxes)  

fig.tight_layout(rect=[0.01,0,1,0.99])
plt.savefig("Figure7.png")
plt.savefig("Figure7.pdf")
fig.show();