In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
from tqdm.notebook import tqdm
import gc
import pickle
import random
import warnings
import matplotlib.patches as patches
from scipy.stats import pearsonr, spearmanr, kendalltau
from scipy import stats
import matplotlib

In [None]:
def load_file(file):

    r = open(file,'rb')
    x = pickle.load(r)
    r.close()

    return x

In [None]:
def save_file(x,file):

    r = open(file,'wb')
    pickle.dump(x,r)

In [None]:
def degree_distribution(F):
    degree = pd.DataFrame(nx.degree(F))
    degree = degree.sort_values(by=[1], ascending=False)
    citypdf_sum2 = pd.DataFrame(degree[1].value_counts())
    citypdf_sum2['f'] = citypdf_sum2.index
    citypdf_sum2['频率'] = citypdf_sum2[1]/citypdf_sum2[1].sum()
    citypdf_sum2 = citypdf_sum2.sort_values(by=['f'], ascending=True)
    ccdf_sum2 = np.zeros(len(citypdf_sum2))
    for i in range(len(ccdf_sum2)):
        ccdf_sum2[i] = 1-np.sum(citypdf_sum2.values[0:i,2])
    citypdf_sum2['ccdf'] = ccdf_sum2
    gdo = pd.DataFrame(F.out_degree())
    gdo = gdo.sort_values(by=[1], ascending=False)
    citypdf_sum = pd.DataFrame(gdo[1].value_counts())
    citypdf_sum['f'] = citypdf_sum.index
    citypdf_sum['频率'] = citypdf_sum[1]/citypdf_sum[1].sum()
    citypdf_sum = citypdf_sum.sort_values(by=['f'], ascending=True)
    ccdf_sum = np.zeros(len(citypdf_sum))
    for i in range(len(ccdf_sum)):
        ccdf_sum[i] = 1-np.sum(citypdf_sum.values[0:i,2])
    citypdf_sum['ccdf'] = ccdf_sum
    gdi = pd.DataFrame(F.in_degree())
    gdi = gdi.sort_values(by=[1], ascending=False)
    citypdf_sum1 = pd.DataFrame(gdi[1].value_counts())
    citypdf_sum1['f'] = citypdf_sum1.index
    citypdf_sum1['频率'] = citypdf_sum1[1]/citypdf_sum1[1].sum()
    citypdf_sum1 = citypdf_sum1.sort_values(by=['f'], ascending=True)
    ccdf_sum1 = np.zeros(len(citypdf_sum1))
    for i in range(len(ccdf_sum1)):
        ccdf_sum1[i] = 1-np.sum(citypdf_sum1.values[0:i,2])
    citypdf_sum1['ccdf'] = ccdf_sum1
    degree.set_index([0], inplace=True)
    gdo.set_index([0], inplace=True)
    gdi.set_index([0], inplace=True)
    degree_all = pd.concat([degree, gdo, gdi], axis=1)
    degree_all.columns = ['Degree','Out-degree','In-degree']
    return degree_all, citypdf_sum2, citypdf_sum, citypdf_sum1

In [None]:
def out_in_correlation(FTdegree_all):
    FTdegree_all1 = FTdegree_all[FTdegree_all['Out-degree']!=0]
    #FTdegree_all1 = FTdegree_all[(FTdegree_all['Out-degree']!=0)&(FTdegree_all['In-degree']!=0)]
    bins = [1, 3**1, 3**2, 3**3, 3**4, 3**5, 3**6, 3**7, 3**8, 3**9, 3**10]
    FTdegree_all1['cut-out'] = pd.cut(FTdegree_all1['Out-degree'], bins, right=False, labels=bins[:-1])
    FTdegree_all2 = FTdegree_all1.groupby('cut-out')['In-degree'].agg(['mean', 'count', 'sem'])
    return FTdegree_all2

In [None]:
def network_statistics(F):
    a1 = F.number_of_nodes()
    a2 = F.number_of_edges()
    a3 = nx.density(F)
    G = nx.Graph(F)
    a4 = sum(nx.triangles(G).values())/3
    a5 = nx.transitivity(G)
    a6 = nx.average_clustering(G)
    a7 = nx.transitivity(F)
    a8 = nx.average_clustering(F)
    statisticslist = [a1, a2, a3, a4, a5, a6, a7, a8]
    
    print('节点数', statisticslist[0])
    print('边数', statisticslist[1])
    print('网络密度', statisticslist[2])
    print('三角形数', statisticslist[3])
    print('全局集聚系数（无向）', statisticslist[4])
    print('平均集聚系数（无向）', statisticslist[5])
    print('全局集聚系数（有向）', statisticslist[6])
    print('平均集聚系数（有向）', statisticslist[7])

In [None]:
def disruption_correlation(FT,FTDisruption):
    adjacency_list = pd.DataFrame(FT.edges())
    adjacency_list.columns = ['Theorem_id','Ref_id']
    adjacency_list1 = adjacency_list.drop_duplicates(subset=['Theorem_id','Ref_id'], keep='first')
    adjacency_list2 = pd.merge(adjacency_list1, FTDisruption, left_on=adjacency_list1['Theorem_id'], right_index=True)
    adjacency_list2 = adjacency_list2.drop(['key_0','Clustering'],axis=1)
    adjacency_list3 = pd.merge(adjacency_list2, FTDisruption, left_on=adjacency_list2['Ref_id'], right_index=True)
    adjacency_list3 = adjacency_list3.drop(['key_0','Disruption_y','Clustering'],axis=1)
    adjacency_list3['Generation_diff'] = adjacency_list3['Generation_x']-adjacency_list3['Generation_y']
    adjacency_list3_count1 = adjacency_list3.groupby('Theorem_id')['Generation_diff'].agg(np.mean)
    adjacency_list3['Generation_diff_mean'] = adjacency_list3['Theorem_id'].map(adjacency_list3_count1)
    adjacency_list3_count2 = adjacency_list3.groupby('Theorem_id')['In-degree_y'].agg(np.mean)
    adjacency_list3['kin_mean'] = adjacency_list3['Theorem_id'].map(adjacency_list3_count2)
    adjacency_list4 = adjacency_list3.drop_duplicates(subset=['Theorem_id'], keep='first')
    adjacency_list4 = adjacency_list4.drop(['Ref_id','Generation_y','Degree_y','Out-degree_y','In-degree_y','Generation_diff','Citation_y'],axis=1)
    adjacency_list5 = adjacency_list4[adjacency_list4['Citation_x']>0]
    adjacency_list5 = adjacency_list5.drop(['nj_x','ni_x','nk_x','nj_y','ni_y','nk_y'],axis=1)
    adjacency_list5.columns = ['paperid','Generation','Disruption','Degree','Out-degree','In-degree','Citation','Reference age (in generations)','Reference popularity']

    adjacency_list6 = adjacency_list5.sort_values(by=['Citation'], ascending=True)
    adjacency_list6['No'] = range(0,len(adjacency_list6),1)
    adjacency_list6['Citation_cut'] = pd.qcut(adjacency_list6['No'], len(Glistlabel), labels=Glistlabel)
    pearsonr_pearsonr = []
    for i in range(len(Glistlabel)):
        data_q1 = adjacency_list6[adjacency_list6['Citation_cut']==Glistlabel[i]]
        pearsonr_pearsonr.append(pearsonr(data_q1['Disruption'],data_q1['Citation']))
    disruption_correlation__statistics = [adjacency_list5,
                                          pearsonr_pearsonr]
    return disruption_correlation__statistics

In [None]:
colorlist = {
        'FT1':'#1A4786',
        'FT2':'#0F284C',
        'FT3':'#586C86',
        'FM1':'#ef852f',
        'FM2':'#864B1A',
        'FM3':'#EFB483',
        'FAPS1':'#cb4042',
        'FAPS2':'#722426',
        'FAPS3':'#CB7D7E',
        'FT1alpha':(26/255,71/255,134/255,0.8),
        'FM1alpha':(239/255,133/255,47/255,1),
        'FAPS1alpha':(203/255,64/255,66/255,1),
        'NM':'#F6F4F2',
        'GM':'#fff1c1',
        }

In [None]:
list_all = pd.read_excel('F:\\dataset\\mpeuni\\Theorems\\new\\list_all.xlsx', header = 0)
adjacency_listT = pd.read_excel('F:\\dataset\\mpeuni\\Theorems\\new\\adjacency_list_Theorems_and_Axioms_only.xlsx', header = 0)
FT = nx.from_pandas_edgelist(adjacency_listT, 'Theorem_id', 'Ref_id', create_using=nx.DiGraph())
FM = load_file('math_scimagotop17_cleaned.pkl')
FAPS = load_file('Cit-HepTh_cleaned.pkl')

In [None]:
network_statistics(FT)

In [None]:
network_statistics(FM)

In [None]:
network_statistics(FAPS)

In [None]:
#figure2

In [None]:
FTdegree_all, FTdegreepdf, FToutdegreepdf, FTindegreepdf = degree_distribution(FT)
FMdegree_all, FMdegreepdf, FMoutdegreepdf, FMindegreepdf = degree_distribution(FM)
FAPSdegree_all, FAPSdegreepdf, FAPSoutdegreepdf, FAPSindegreepdf = degree_distribution(FAPS)

In [None]:
FTdegree_all1 = FTdegree_all[FTdegree_all['Out-degree']!=0]
FMdegree_all1 = FMdegree_all[FMdegree_all['Out-degree']!=0]
FAPSdegree_all1 = FAPSdegree_all[FAPSdegree_all['Out-degree']!=0]

In [None]:
spearmanr(FTdegree_all1['Out-degree'], FTdegree_all1['In-degree'])

In [None]:
spearmanr(FMdegree_all1['Out-degree'], FMdegree_all1['In-degree'])

In [None]:
spearmanr(FAPSdegree_all1['Out-degree'], FAPSdegree_all1['In-degree'])

In [None]:
FTdegree_all2 = out_in_correlation(FTdegree_all)
FMdegree_all2 = out_in_correlation(FMdegree_all)
FAPSdegree_all2 = out_in_correlation(FAPSdegree_all)

In [None]:
def percentile_bootstrap(data):
    data = data[data['Out-degree']!=0]
    #data = data[(data['Out-degree']!=0)&(data['In-degree']!=0)]
    data = data.sort_values(by=['Out-degree'], ascending=True)
    data['no'] = range(1, len(data) + 1)
    #data['group'] = pd.qcut(data['Out-degree'].rank(method='first'), 4, labels=['group1', 'group2', 'group3', 'group4'])
    data['group'] = pd.qcut(data['no'], 5, labels=['group1', 'group2', 'group3', 'group4', 'group5'])
    data_q1 = data[data['group']=='group1']
    bootstrap_means_q1 = []
    data_q4 = data[data['group']=='group5']
    data_all = pd.concat([data_q1, data_q4])
    bootstrap_means_q4 = []
    t_stat = []
    #t_stat_obs, p_value_obs = stats.ttest_ind(data_q1['In-degree'], data_q4['In-degree'])
    diff_obs = np.mean(data_q1['In-degree'])-np.mean(data_q4['In-degree'])
    for i in range(1000):
        sample1 = data_q1.sample(n=len(data_q1), replace=True)
        bootstrap_means_q1.append(np.mean(sample1['In-degree']))
        sample4 = data_q4.sample(n=len(data_q4), replace=True)
        bootstrap_means_q4.append(np.mean(sample4['In-degree']))
        data_all1 = data_all.sample(n=len(data_all), replace=True)
        data_sample1 = list(data_all1['In-degree'])[:len(data_q1)]
        data_sample4 = list(data_all1['In-degree'])[len(data_q1):]
        sample_diff = np.mean(data_sample1)-np.mean(data_sample4)
        #t_stat_1, p_value_1 = stats.ttest_ind(sample1['In-degree'], sample4['In-degree'])
        #t_stat.append(t_stat_1)
        t_stat.append(sample_diff)
    p_value_fin = sum(x > diff_obs for x in t_stat)
    return bootstrap_means_q1,bootstrap_means_q4, p_value_fin

In [None]:
FTbootstrap_means_q1,FTbootstrap_means_q4,FTp_value_fin  = percentile_bootstrap(FTdegree_all)
FMbootstrap_means_q1,FMbootstrap_means_q4,FMp_value_fin = percentile_bootstrap(FMdegree_all)
FAPSbootstrap_means_q1,FAPSbootstrap_means_q4,FAPSp_value_fin = percentile_bootstrap(FAPSdegree_all)

In [None]:
FTp_value_fin

In [None]:
FMp_value_fin

In [None]:
FAPSp_value_fin

In [None]:
FTNMclustering = []

In [None]:
for i in tqdm(range(10)):
    name2 = 'FTheoremRN0'+str(i+1)+'.pkl'
    FTNM = load_file(name2)
    FTNMclustering.append(nx.average_clustering(FTNM))

In [None]:
FMNMclustering = []

In [None]:
for i in tqdm(range(10)):
    name2 = 'FMATHRN0'+str(i+1)+'.pkl'
    FMNM = load_file(name2)
    FMNMclustering.append(nx.average_clustering(FMNM))

In [None]:
FAPSNMclustering = []

In [None]:
for i in tqdm(range(10)):
    name2 = 'FCit-HepThRN0'+str(i+1)+'.pkl'
    FAPSNM = load_file(name2)
    FAPSNMclustering.append(nx.average_clustering(FAPSNM))

In [None]:
species = ('Theorem', 'Paper (math)', 'Paper (cit-HepTh)')
clustering = {
    'Real networks': (0.04181, 0.10768, 0.15686),
    'Null model': (round(np.mean(FTNMclustering),5), round(np.mean(FMNMclustering),5), round(np.mean(FAPSNMclustering),5)),
}

color_clustering = {
    'Real networks': (colorlist['FT1'], colorlist['FM1'], colorlist['FAPS1']),
    'Null model': (colorlist['NM'], colorlist['NM'], colorlist['NM']),
}

In [None]:
fig = plt.figure(figsize=(16, 8))
grid = plt.GridSpec(2, 4, wspace=0.32, hspace=0.4)
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['font.sans-serif'] = ['Arial']

ax1 = fig.add_subplot(grid[0:1, 0:1])
ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.plot(FTdegreepdf['f'], FTdegreepdf['ccdf'], '-o', color=colorlist['FT1'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax1.plot(FToutdegreepdf['f'], FToutdegreepdf['ccdf'], '-^', color=colorlist['FT2'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax1.plot(FTindegreepdf['f'], FTindegreepdf['ccdf'], '-s', color=colorlist['FT3'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax1.set_ylabel('Cumulative probability', fontsize=14)
ax1.set_xlabel('Degree', fontsize=14)
ax1.legend(frameon=False, fontsize=8)
ax1.set_title('Theorem', fontsize=16, verticalalignment='bottom')

ax2 = fig.add_subplot(grid[0:1, 1:2])
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.plot(FMdegreepdf['f'], FMdegreepdf['ccdf'], '-o', color=colorlist['FM1'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax2.plot(FMoutdegreepdf['f'], FMoutdegreepdf['ccdf'], '-^', color=colorlist['FM2'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax2.plot(FMindegreepdf['f'], FMindegreepdf['ccdf'], '-s', color=colorlist['FM3'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax2.set_ylabel('Cumulative probability', fontsize=14)
ax2.set_xlabel('Degree', fontsize=14)
ax2.legend(frameon=False, fontsize=8)
ax2.set_title('Paper (math)', fontsize=16, verticalalignment='bottom')

ax3 = fig.add_subplot(grid[0:1, 2:3])
ax3.set_xscale('log')
ax3.set_yscale('log')
ax3.plot(FAPSdegreepdf['f'], FAPSdegreepdf['ccdf'], '-o', color=colorlist['FAPS1'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax3.plot(FAPSoutdegreepdf['f'], FAPSoutdegreepdf['ccdf'], '-^', color=colorlist['FAPS2'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax3.plot(FAPSindegreepdf['f'], FAPSindegreepdf['ccdf'], '-s', color=colorlist['FAPS3'], 
         markersize=4, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax3.set_ylabel('Cumulative probability', fontsize=14)
ax3.set_xlabel('Degree', fontsize=14)
ax3.legend(frameon=False, fontsize=8)
ax3.set_title('Paper (cit-HepTh)', fontsize=16, verticalalignment='bottom')

#图d
ax4 = fig.add_subplot(grid[1:2, 0:1])
ax4.set_xscale('log')
ax4.set_yscale('log')
ax4.errorbar(FTdegree_all2.index, FTdegree_all2['mean'], yerr=FTdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FT1'], color=colorlist['FT1'], capsize=0,capthick=0, 
             markerfacecolor=colorlist['FT3'], markeredgewidth=1.5, elinewidth=1.5, markersize=6,
             label='Overall')
#ax3.set_ylim(1,100)
ax4.set_xlabel('$k^{out}$', fontsize=14)
ax4.set_ylabel('$<k^{in}>$', fontsize=14)

ax7 = fig.add_axes([0.19, 0.31, 0.085, 0.12])
hist1, bin_edges1 = np.histogram(FTbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax7.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FT1'], label='Top 20% $k^{out}$')
hist2, bin_edges2 = np.histogram(FTbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax7.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FT1'], label='Last 20% $k^{out}$')
ax7.set_ylim(0,0.3)
ax7.set_xlabel('Bootstrap $<k^{in}>$', fontsize=10, labelpad=0)
ax7.set_ylabel('Probability', fontsize=10)


h1, l1 = ax4.get_legend_handles_labels()
h2, l2 = ax7.get_legend_handles_labels()
ax4.legend(h1+h2, l1+l2, frameon=True, fontsize=8, loc='lower left', 
           facecolor='#ffffff', framealpha=0.4, edgecolor=(255/255,255/255,255/255,0.4))

ax4.set_title('Theorem', fontsize=16, verticalalignment='bottom')

#图e
ax5 = fig.add_subplot(grid[1:2, 1:2])
ax5.set_xscale('log')
ax5.set_yscale('log')
ax5.errorbar(FMdegree_all2.index, FMdegree_all2['mean'], yerr=FMdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FM1'], color=colorlist['FM1'], capsize=0,capthick=0, 
             markerfacecolor=colorlist['FM3'], markeredgewidth=1.5, elinewidth=1.5, markersize=6,
             label='Overall')
ax5.set_ylim(2.5,20)
ax5.yaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())
ax5.set_xlabel('$k^{out}$', fontsize=14)
ax5.set_ylabel('$<k^{in}>$', fontsize=14)

ax8 = fig.add_axes([0.365, 0.31, 0.085, 0.12])
hist1, bin_edges1 = np.histogram(FMbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax8.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FM1'], label='Top 20% $k^{out}$')
hist2, bin_edges2 = np.histogram(FMbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax8.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FM1'], label='Last 20% $k^{out}$')
ax8.set_ylim(0,0.3)
ax8.set_xlabel('Bootstrap $<k^{in}>$', fontsize=10, labelpad=0)
ax8.set_ylabel('Probability', fontsize=10)

h1, l1 = ax5.get_legend_handles_labels()
h2, l2 = ax8.get_legend_handles_labels()
ax5.legend(h1+h2, l1+l2, frameon=True, fontsize=8, loc='lower right', 
           facecolor='#ffffff', framealpha=0.4, edgecolor=(255/255,255/255,255/255,0.4))

ax5.set_title('Paper (math)', fontsize=16, verticalalignment='bottom')
#ax5.set_ylim(0.85,16)

#图f
ax6 = fig.add_subplot(grid[1:2, 2:3])
ax6.set_xscale('log')
ax6.set_yscale('log')
ax6.errorbar(FAPSdegree_all2.index, FAPSdegree_all2['mean'], yerr=FAPSdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FAPS1'], color=colorlist['FAPS1'], capsize=0,capthick=0, 
             markerfacecolor=colorlist['FAPS3'], markeredgewidth=1.5, elinewidth=1.5, markersize=6,
             label='Overall')
#ax3.set_ylim(1,100)
ax6.set_xlabel('$k^{out}$', fontsize=14)
ax6.set_ylabel('$<k^{in}>$', fontsize=14)

ax9 = fig.add_axes([0.57, 0.31, 0.085, 0.12])
hist1, bin_edges1 = np.histogram(FAPSbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax9.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FAPS1'], label='Top 20% $k^{out}$')
hist2, bin_edges2 = np.histogram(FAPSbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax9.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FAPS1'], label='Last 20% $k^{out}$')
ax9.set_ylim(0,0.3)
ax9.set_xlabel('Bootstrap $<k^{in}>$', fontsize=10, labelpad=0)
ax9.set_ylabel('Probability', fontsize=10)

h1, l1 = ax6.get_legend_handles_labels()
h2, l2 = ax9.get_legend_handles_labels()

ax6.legend(h1+h2, l1+l2, frameon=True, fontsize=8, loc='lower right', 
           facecolor='#ffffff', framealpha=0.4, edgecolor=(255/255,255/255,255/255,0.4))

ax6.set_title('Paper (cit-HepTh)', fontsize=16, verticalalignment='bottom')

ax10 = fig.add_subplot(grid[0:2, 3:4])

x = np.arange(len(species))
width = 0.25
multiplier = 0.5

rect1 = patches.Rectangle((-0.02, -0.3), 0.02, 2.925, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax10.add_patch(rect1)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering.items(),clustering.items()):
    offset = width * multiplier
    rects = ax10.barh(x + offset, measurement, width, label=attribute2, color=color_c)
    ax10.bar_label(rects, padding=2)
    multiplier += 1
#ax10.axhline(y=0, xmin=0, xmax=1, color='#252525', linestyle='--')

ax10.set_xlabel('Average clustering coefficient', fontsize=14, labelpad=4)
ax10.set_ylabel('Network', fontsize=14, labelpad=4)
ax10.set_yticks(x + width)
ax10.set_yticklabels(species, rotation='vertical', ha='right', va='center')
ax10.set_ylim(-0.3,2.625)
ax10.set_xlim(-0.02,0.22)
ax10.set_xticks([0,0.04,0.08,0.12,0.16, 0.20])
ax10.tick_params(axis='both', which='major', pad=7.5)
ax10.legend(frameon=False, fontsize=10, loc='upper right')
#ax10.set_xticklabels([0,0.02,0.04,0.06,0.08,0.10,0.12,0.14,0.16])
#ax10.set_yticklabels(ax10.get_yticks(), rotation=90)
ax10.invert_yaxis()

#编号
ax1.text(-0.28, 1.09, 'a', size=14, weight='bold', transform=ax1.transAxes)
ax2.text(-0.28, 1.09, 'b', size=14, weight='bold', transform=ax2.transAxes)
ax3.text(-0.28, 1.09, 'c', size=14, weight='bold', transform=ax3.transAxes)
ax4.text(-0.28, 1.09, 'd', size=14, weight='bold', transform=ax4.transAxes)
ax5.text(-0.28, 1.09, 'e', size=14, weight='bold', transform=ax5.transAxes)
ax6.text(-0.28, 1.09, 'f', size=14, weight='bold', transform=ax6.transAxes)
ax10.text(-0.22, 1.04, 'g', size=14, weight='bold', transform=ax10.transAxes)

In [None]:
#figure3

In [None]:
FTDisruption = load_file('Disruptiondata定理D10.pkl')
FMDisruption = load_file('Disruptiondata数学D10.pkl')
FAPSDisruption = load_file('Disruptiondata-cit10.pkl')

In [None]:
Glistlabel = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%']

In [None]:
FTdisruption_correlation = disruption_correlation(FT,FTDisruption)
FMdisruption_correlation = disruption_correlation(FM,FMDisruption)
FAPSdisruption_correlation = disruption_correlation(FAPS,FAPSDisruption)

In [None]:
FTdisruption_correlation[0]['Disruption'].mean()

In [None]:
FMdisruption_correlation[0]['Disruption'].mean()

In [None]:
FAPSdisruption_correlation[0]['Disruption'].mean()

In [None]:
pearsonr(FTdisruption_correlation[0]['Disruption'],FTdisruption_correlation[0]['Citation'])

In [None]:
pearsonr(FMdisruption_correlation[0]['Disruption'],FMdisruption_correlation[0]['Citation'])

In [None]:
pearsonr(FAPSdisruption_correlation[0]['Disruption'],FAPSdisruption_correlation[0]['Citation'])

In [None]:
def min_max_scaling(data):
    min_val = np.min(data)
    max_val = np.max(data)
    return (data - min_val) / (max_val - min_val)

In [None]:
def gini_coefficient(FTDisruption):
    FTDisruption = FTDisruption.sort_values(by=['Disruption'], ascending=True)
    FTDisruption1 = np.array(FTDisruption['Disruption'])
    x = min_max_scaling(FTDisruption1)
    diffsum = 0
    for i, xi in enumerate(x[:-1], 1):
        diffsum += np.sum(np.abs(xi - x[i:]))
    gini = diffsum / (len(x)**2 * np.mean(x))
    return gini

In [None]:
FTgini = gini_coefficient(FTdisruption_correlation[0])
FMgini = gini_coefficient(FMdisruption_correlation[0])
FAPSgini = gini_coefficient(FAPSdisruption_correlation[0])

In [None]:
FTpearsonr_pearsonrc2 = FTdisruption_correlation[1][len(Glistlabel)-1][0]
FMpearsonr_pearsonrc2 = FMdisruption_correlation[1][len(Glistlabel)-1][0]
FAPSpearsonr_pearsonrc2 = FAPSdisruption_correlation[1][len(Glistlabel)-1][0]

In [None]:
FTNMDisruption = []
FMNMDisruption = []
FAPSNMDisruption = []

In [None]:
FTNMDisruption_mean = []
FMNMDisruption_mean = []
FAPSNMDisruption_mean = []

In [None]:
FTNMgini = []
FMNMgini = []
FAPSNMgini = []

In [None]:
FTNMpearsonr_pearsonrc2 = []
FMNMpearsonr_pearsonrc2 = []
FAPSNMpearsonr_pearsonrc2 = []

In [None]:
for i in tqdm(range(10)):
    name1 = 'Disruptiondata10FTheoremRN0'+str(i+1)+'.pkl'
    name2 = 'FTheoremRN0'+str(i+1)+'.pkl'
    FTNMDisruption1 = load_file(name1)
    FTNM1 = load_file(name2)
    FTNMdegree_all1, FTNMdegreepdf1, FTNMoutdegreepdf1, FTNMindegreepdf1 = degree_distribution(FTNM1)
    FTNMDisruption1 = pd.merge(FTNMDisruption1, FTNMdegree_all1, left_index=True, right_index=True)
    FTNMDisruption1['Clustering'] = None
    FTNMDisruption1['Citation'] = FTNMDisruption1['ni'] + FTNMDisruption1['nj']
    FTNMDisruption1.columns = ['Generation','Disruption','nj','ni','nk','Degree','Out-degree','In-degree','Clustering','Citation']
    FTNMdisruption_correlation1 = disruption_correlation(FTNM1,FTNMDisruption1)
    FTNMDisruption.append(pearsonr(FTNMdisruption_correlation1[0]['Disruption'],FTNMdisruption_correlation1[0]['Citation'])[0])
    FTNMDisruption_mean.append(np.mean(FTNMdisruption_correlation1[0]['Disruption']))
    FTNMgini.append(gini_coefficient(FTNMdisruption_correlation1[0]))
    FTNMpearsonr_pearsonrc2.append(FTNMdisruption_correlation1[1][len(Glistlabel)-1][0])

In [None]:
for i in tqdm(range(10)):
    name1 = 'Disruptiondata10FMATHRN0'+str(i+1)+'.pkl'
    name2 = 'FMATHRN0'+str(i+1)+'.pkl'
    FTNMDisruption1 = load_file(name1)
    FTNM1 = load_file(name2)
    FTNMdegree_all1, FTNMdegreepdf1, FTNMoutdegreepdf1, FTNMindegreepdf1 = degree_distribution(FTNM1)
    FTNMDisruption1 = pd.merge(FTNMDisruption1, FTNMdegree_all1, left_index=True, right_index=True)
    FTNMDisruption1['Clustering'] = None
    FTNMDisruption1['Citation'] = FTNMDisruption1['ni'] + FTNMDisruption1['nj']
    FTNMDisruption1.columns = ['Generation','Disruption','nj','ni','nk','Degree','Out-degree','In-degree','Clustering','Citation']
    FTNMdisruption_correlation1 = disruption_correlation(FTNM1,FTNMDisruption1)
    FMNMDisruption.append(pearsonr(FTNMdisruption_correlation1[0]['Disruption'],FTNMdisruption_correlation1[0]['Citation'])[0])
    FMNMDisruption_mean.append(np.mean(FTNMdisruption_correlation1[0]['Disruption']))
    FMNMgini.append(gini_coefficient(FTNMdisruption_correlation1[0]))
    FMNMpearsonr_pearsonrc2.append(FTNMdisruption_correlation1[1][len(Glistlabel)-1][0])

In [None]:
for i in tqdm(range(10)):
    name1 = 'Disruptiondata10FCit-HepThRN0'+str(i+1)+'.pkl'
    name2 = 'FCit-HepThRN0'+str(i+1)+'.pkl'
    FTNMDisruption1 = load_file(name1)
    FTNM1 = load_file(name2)
    FTNMdegree_all1, FTNMdegreepdf1, FTNMoutdegreepdf1, FTNMindegreepdf1 = degree_distribution(FTNM1)
    FTNMDisruption1 = pd.merge(FTNMDisruption1, FTNMdegree_all1, left_index=True, right_index=True)
    FTNMDisruption1['Clustering'] = None
    FTNMDisruption1['Citation'] = FTNMDisruption1['ni'] + FTNMDisruption1['nj']
    FTNMDisruption1.columns = ['Generation','Disruption','nj','ni','nk','Degree','Out-degree','In-degree','Clustering','Citation']
    FTNMdisruption_correlation1 = disruption_correlation(FTNM1,FTNMDisruption1)
    FAPSNMDisruption.append(pearsonr(FTNMdisruption_correlation1[0]['Disruption'],FTNMdisruption_correlation1[0]['Citation'])[0])
    FAPSNMDisruption_mean.append(np.mean(FTNMdisruption_correlation1[0]['Disruption']))
    FAPSNMgini.append(gini_coefficient(FTNMdisruption_correlation1[0]))
    FAPSNMpearsonr_pearsonrc2.append(FTNMdisruption_correlation1[1][len(Glistlabel)-1][0])

In [None]:
np.mean(FTNMDisruption_mean)

In [None]:
np.mean(FMNMDisruption_mean)

In [None]:
np.mean(FAPSNMDisruption_mean)

In [None]:
np.mean(FTNMDisruption)

In [None]:
np.mean(FMNMDisruption)

In [None]:
np.mean(FAPSNMDisruption)

In [None]:
(0.19310264578128114-np.mean(FTNMDisruption))/np.std(FTNMDisruption)

In [None]:
(0.030700363339169895-np.mean(FMNMDisruption))/np.std(FMNMDisruption)

In [None]:
(-0.03999610460183501-np.mean(FAPSNMDisruption))/np.std(FAPSNMDisruption)

In [None]:
def percentile_bootstrap1(data):
    data = data.sort_values(by=['Citation'], ascending=True)
    data['no'] = range(1, len(data) + 1)
    #data['group'] = pd.qcut(data['Out-degree'].rank(method='first'), 4, labels=['group1', 'group2', 'group3', 'group4'])
    data['group'] = pd.qcut(data['no'], 5, labels=['group1', 'group2', 'group3', 'group4', 'group5'])
    data_q1 = data[data['group']=='group1']
    bootstrap_means_q1 = []
    data_q4 = data[data['group']=='group5']
    data_all = pd.concat([data_q1, data_q4])
    bootstrap_means_q4 = []
    t_stat = []
    #t_stat_obs, p_value_obs = stats.ttest_ind(data_q1['In-degree'], data_q4['In-degree'])
    diff_obs = np.mean(data_q1['Disruption'])-np.mean(data_q4['Disruption'])
    for i in range(1000):
        sample1 = data_q1.sample(n=len(data_q1), replace=True)
        bootstrap_means_q1.append(np.mean(sample1['Disruption']))
        sample4 = data_q4.sample(n=len(data_q4), replace=True)
        bootstrap_means_q4.append(np.mean(sample4['Disruption']))
        data_all1 = data_all.sample(n=len(data_all), replace=True)
        data_sample1 = list(data_all1['Disruption'])[:len(data_q1)]
        data_sample4 = list(data_all1['Disruption'])[len(data_q1):]
        sample_diff = np.mean(data_sample1)-np.mean(data_sample4)
        #t_stat_1, p_value_1 = stats.ttest_ind(sample1['In-degree'], sample4['In-degree'])
        #t_stat.append(t_stat_1)
        t_stat.append(sample_diff)
    p_value_fin = sum(x > diff_obs for x in t_stat)
    return bootstrap_means_q1,bootstrap_means_q4, p_value_fin

In [None]:
FTDCbootstrap_means_q1,FTDCbootstrap_means_q4,FTDCp_value_fin  = percentile_bootstrap1(FTdisruption_correlation[0])
FMDCbootstrap_means_q1,FMDCbootstrap_means_q4,FMDCp_value_fin = percentile_bootstrap1(FMdisruption_correlation[0])
FAPSDCbootstrap_means_q1,FAPSDCbootstrap_means_q4,FAPSDCp_value_fin = percentile_bootstrap1(FAPSdisruption_correlation[0])

In [None]:
species = ('Theorem', 'Paper (math)', 'Paper (cit-HepTh)')
clustering = {
    'Real networks': (round(FTdisruption_correlation[0]['Disruption'].mean(),5), round(FMdisruption_correlation[0]['Disruption'].mean(),5),
                     round(FAPSdisruption_correlation[0]['Disruption'].mean(),5)),
    'Null model': (round(np.mean(FTNMDisruption_mean),5), round(np.mean(FMNMDisruption_mean),5),
                     round(np.mean(FAPSNMDisruption_mean),5)),
}

Gini = {
    'Real networks': (round(FTgini,5), 
                      round(FMgini,5), 
                      round(FAPSgini,5)),
    'Null model': (round(np.mean(FTNMgini),5), round(np.mean(FMNMgini),5),
                     round(np.mean(FAPSNMgini),5)),
}
    
DC_correlation = {
    'Real networks': (0.19310, 0.03070, -0.04000),
    'Null model': (round(np.mean(FTNMDisruption),5), round(np.mean(FMNMDisruption),5),
                     round(np.mean(FAPSNMDisruption),5)),
}

DC_correlation1 = {
    'Real networks': (round(FTpearsonr_pearsonrc2,5), 
                      round(FMpearsonr_pearsonrc2,5), 
                      round(FAPSpearsonr_pearsonrc2,5)),
    'Null model': (round(np.mean(FTNMpearsonr_pearsonrc2),5), round(np.mean(FMNMpearsonr_pearsonrc2),5),
                     round(np.mean(FAPSNMpearsonr_pearsonrc2),5)),
}

color_clustering = {
    'Real networks': (colorlist['FT1'], colorlist['FM1'], colorlist['FAPS1']),
    'Null model': (colorlist['NM'], colorlist['NM'], colorlist['NM']),
}

In [None]:
fig = plt.figure(figsize=(16, 12))
grid = plt.GridSpec(6, 8)
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['font.sans-serif'] = ['Arial']

x = np.arange(len(species))
width = 0.25
multiplier = 0.5

ax1 = fig.add_subplot(grid[0:3, 0:3])

rect1 = patches.Rectangle((-0.125, -0.13), 2.825, 0.13, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax1.add_patch(rect1)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering.items(),clustering.items()):
    offset = width * multiplier
    rects = ax1.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax1.bar_label(rects, padding=2)
    multiplier += 1
#ax1.axhline(y=0, xmin=0, xmax=1, color='#252525', linestyle='--')

ax1.set_ylabel('Average disruption', fontsize=14, labelpad=4)
ax1.set_xlabel('Network', fontsize=14, labelpad=4)
ax1.set_xticks(x + width)
ax1.set_xticklabels(species)
ax1.set_xlim(-0.125,2.625)
ax1.set_ylim(-0.12,0.3)
ax1.tick_params(axis='both', which='major', pad=7.5)
#ax1.set_yticks([0,0.02,0.04,0.06,0.08,0.10,0.12,0.14,0.16])
ax1.legend(frameon=False, fontsize=12,loc='upper right', ncol=2)

x = np.arange(len(species))
width = 0.25
multiplier = 0.5

ax2 = fig.add_subplot(grid[0:3, 3:6])
rect2 = patches.Rectangle((-0.125, -0.2275), 2.825, 0.2275, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax2.add_patch(rect2)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering.items(),Gini.items()):
    offset = width * multiplier
    rects = ax2.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax2.bar_label(rects, padding=3)
    multiplier += 1
ax2.set_xlabel('Network', fontsize=14, labelpad=4)
ax2.set_ylabel('Gini coefficient of disruption', fontsize=14, labelpad=4)
ax2.set_xticks(x + width)
ax2.set_xticklabels(species)
ax2.set_xlim(-0.125,2.625)
ax2.set_ylim(-0.1,0.25)
ax2.tick_params(axis='both', which='major', pad=7.5)
ax2.legend(frameon=False, fontsize=12, loc='upper right',ncol=2)

x = np.arange(len(species))
width = 0.25
multiplier = 0.5

ax3 = fig.add_subplot(grid[3:6, 0:3])

rect3 = patches.Rectangle((-0.125, -0.2275), 2.825, 0.2275, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax3.add_patch(rect3)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering.items(),DC_correlation.items()):
    offset = width * multiplier
    rects = ax3.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax3.bar_label(rects, padding=3)
    multiplier += 1
ax3.set_ylabel('Correlation (Disruption, Citations)', fontsize=14, labelpad=4)
ax3.set_xlabel('Network', fontsize=14, labelpad=4)
ax3.set_xticks(x + width)
ax3.set_xticklabels(species)
ax3.set_xlim(-0.125,2.625)
ax3.set_ylim(-0.08,0.6)
ax3.tick_params(axis='both', which='major', pad=7.5)
ax3.legend(frameon=False, fontsize=12, loc='upper right',ncol=2)

x = np.arange(len(species))
width = 0.25
multiplier = 0.5

ax4 = fig.add_subplot(grid[3:6, 3:6])
rect4 = patches.Rectangle((-0.125, -0.2275), 2.825, 0.2275, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax4.add_patch(rect4)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering.items(),DC_correlation1.items()):
    offset = width * multiplier
    rects = ax4.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax4.bar_label(rects, padding=3)
    multiplier += 1
ax4.set_ylabel('Correlation (Disruption, Citations) (Top 10% Citations)', fontsize=12, labelpad=4)
ax4.set_xlabel('Network', fontsize=14, labelpad=4)
ax4.set_xticks(x + width)
ax4.set_xticklabels(species)
ax4.set_xlim(-0.125,2.625)
ax4.set_ylim(-0.08,0.6)
ax4.tick_params(axis='both', which='major', pad=7.5)
ax4.legend(frameon=False, fontsize=12, loc='upper right',ncol=2)

ax5 = fig.add_subplot(grid[0:2, 6:8])
hist1, bin_edges1 = np.histogram(FTDCbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax5.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FT1'], label='Top 20% Citations')
hist2, bin_edges2 = np.histogram(FTDCbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax5.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FT1'], label='Last 20% Citations')
ax5.set_ylim(0,0.4)
ax5.set_xlabel('Bootstrap $<Disruption>$', fontsize=14, labelpad=4)
ax5.set_ylabel('Probability', fontsize=14)
ax5.legend(frameon=False, fontsize=10, loc='upper right',ncol=1)
ax5.tick_params(axis='both', which='major', pad=7.5)
ax5.set_title('Theorem', fontsize=16, verticalalignment='bottom')

ax6 = fig.add_subplot(grid[2:4, 6:8])
hist1, bin_edges1 = np.histogram(FMDCbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax6.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FM1'], label='Top 20% Citations')
hist2, bin_edges2 = np.histogram(FMDCbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax6.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FM1'], label='Last 20% Citations')
ax6.set_ylim(0,0.4)
ax6.set_xlabel('Bootstrap $<Disruption>$', fontsize=14, labelpad=4)
ax6.set_ylabel('Probability', fontsize=14)
ax6.legend(frameon=False, fontsize=10, loc='upper right',ncol=1)
ax6.tick_params(axis='both', which='major', pad=7.5)
ax6.set_title('Paper (math)', fontsize=16, verticalalignment='bottom')

ax7 = fig.add_subplot(grid[4:6, 6:8])
hist1, bin_edges1 = np.histogram(FAPSDCbootstrap_means_q4, bins=10)
bin_mids1 = (bin_edges1[:-1] + bin_edges1[1:]) / 2
hist1 = hist1 / 1000
ax7.plot(bin_mids1, hist1, '-^', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FAPS1'], label='Top 20% Citations')
hist2, bin_edges2 = np.histogram(FAPSDCbootstrap_means_q1, bins=10)
bin_mids2 = (bin_edges2[:-1] + bin_edges2[1:]) / 2
hist2 = hist2 / 1000
ax7.plot(bin_mids2, hist2, '-s', linewidth=1, markerfacecolor='#ffffff', color=colorlist['FAPS1'], label='Last 20% Citations')
ax7.set_ylim(0,0.4)
ax7.set_xlabel('Bootstrap $<Disruption>$', fontsize=14, labelpad=4)
ax7.tick_params(axis='both', which='major', pad=7.5)
ax7.set_ylabel('Probability', fontsize=14)
ax7.legend(frameon=False, fontsize=10, loc='upper right',ncol=1)
ax7.set_title('Paper (cit-HepTh)', fontsize=16, verticalalignment='bottom')

plt.subplots_adjust(wspace=1.3, hspace=1.4)

ax1.text(-0.18, 1.06, 'a', size=14, weight='bold', transform=ax1.transAxes)
ax2.text(-0.18, 1.06, 'b', size=14, weight='bold', transform=ax2.transAxes)
ax3.text(-0.18, 1.06, 'c', size=14, weight='bold', transform=ax3.transAxes)
ax4.text(-0.18, 1.06, 'd', size=14, weight='bold', transform=ax4.transAxes)
ax5.text(-0.28, 1.115, 'e', size=14, weight='bold', transform=ax5.transAxes)
ax6.text(-0.28, 1.115, 'f', size=14, weight='bold', transform=ax6.transAxes)
ax7.text(-0.28, 1.115, 'g', size=14, weight='bold', transform=ax7.transAxes)

In [None]:
#figure5

In [None]:
FTGM = load_file('生成模型FTGMnew-04.pkl')
FMGM = load_file('生成模型FMGMtop17-2.pkl')
FAPSGM = load_file('生成模型FcitGM12-1.pkl')

In [None]:
network_statistics(FTGM)

In [None]:
network_statistics(FMGM)

In [None]:
network_statistics(FAPSGM)

In [None]:
FTGMdegree_all, FTGMdegreepdf, FTGMoutdegreepdf, FTGMindegreepdf = degree_distribution(FTGM)
FMGMdegree_all, FMGMdegreepdf, FMGMoutdegreepdf, FMGMindegreepdf = degree_distribution(FMGM)
FAPSGMdegree_all, FAPSGMdegreepdf, FAPSGMoutdegreepdf, FAPSGMindegreepdf = degree_distribution(FAPSGM)

In [None]:
FTGMDisruption = load_file('Disruptiondata10生成模型FTGMnew-04.pkl')
FMGMDisruption = load_file('Disruptiondata10生成模型FMGMtop17-2.pkl')
FAPSGMDisruption = load_file('Disruptiondata10生成模型FcitGM12-1.pkl')

In [None]:
FTGMdisruption_correlation = disruption_correlation(FTGM,FTGMDisruption)
FMGMdisruption_correlation = disruption_correlation(FMGM,FMGMDisruption)
FAPSGMdisruption_correlation = disruption_correlation(FAPSGM,FAPSGMDisruption)

In [None]:
pearsonr(FTGMdisruption_correlation[0]['Disruption'],FTGMdisruption_correlation[0]['Citation'])

In [None]:
pearsonr(FMGMdisruption_correlation[0]['Disruption'],FMGMdisruption_correlation[0]['Citation'])

In [None]:
pearsonr(FAPSGMdisruption_correlation[0]['Disruption'],FAPSGMdisruption_correlation[0]['Citation'])

In [None]:
species = ('Theorem', 'Paper (math)', 'Paper (cit-HepTh)')
clustering_GM = {
    'Real networks': (0.04181, 0.10768, 0.15686),
    'Generative model': (0.04469, 0.08422, 0.11778),
}

DC_correlation_GM = {
    'Real networks': (0.19310, 0.03070, -0.04000),
    'Generative model': (0.10685, -0.00823, -0.03745),
}

color_clustering_GM = {
    'Real network': (colorlist['FT1'], colorlist['FM1'], colorlist['FAPS1']),
    'Generative model': (colorlist['GM'], colorlist['GM'], colorlist['GM']),
}

In [None]:
fig = plt.figure(figsize=(18, 12))
grid = plt.GridSpec(2, 3)
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

ax1 = fig.add_subplot(grid[0:1, 0:1])
ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.plot(FTGMdegreepdf['f'], FTGMdegreepdf['ccdf'], '-o', color=colorlist['FT1'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax1.plot(FTGMoutdegreepdf['f'], FTGMoutdegreepdf['ccdf'], '-^', color=colorlist['FT2'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax1.plot(FTGMindegreepdf['f'], FTGMindegreepdf['ccdf'], '-s', color=colorlist['FT3'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax1.text(1, 0.001, r'$p = 0.05$',fontsize=12)
ax1.text(1, 0.00057, r'$a = 2$',fontsize=12)
ax1.text(1, 0.00035, r'$w = 0.5$',fontsize=12)
ax1.text(1, 0.0002, r'$q = 0$',fontsize=12)

ax1.set_ylabel('Cumulative probability', fontsize=14)
ax1.set_xlabel('Degree', fontsize=14)
ax1.legend(frameon=False, fontsize=12)
ax1.set_title('Theorem (GM)', fontsize=16, verticalalignment='bottom')

ax2 = fig.add_subplot(grid[0:1, 1:2])
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.plot(FMGMdegreepdf['f'], FMGMdegreepdf['ccdf'], '-o', color=colorlist['FM1'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax2.plot(FMGMoutdegreepdf['f'], FMGMoutdegreepdf['ccdf'], '-^', color=colorlist['FM2'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax2.plot(FMGMindegreepdf['f'], FMGMindegreepdf['ccdf'], '-s', color=colorlist['FM3'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax2.text(1, 0.001, r'$p = 0.37$',fontsize=12)
ax2.text(1, 0.00057, r'$a = 2$',fontsize=12)
ax2.text(1, 0.00035, r'$w = 0.85$',fontsize=12)
ax2.text(1, 0.0002, r'$q = 0.028$',fontsize=12)
ax2.set_ylabel('Cumulative probability', fontsize=14)
ax2.set_xlabel('Degree', fontsize=14)
ax2.legend(frameon=False, fontsize=12)
ax2.set_title('Paper (math, GM)', fontsize=16, verticalalignment='bottom')

ax3 = fig.add_subplot(grid[0:1, 2:3])
ax3.set_xscale('log')
ax3.set_yscale('log')
ax3.plot(FAPSGMdegreepdf['f'], FAPSGMdegreepdf['ccdf'], '-o', color=colorlist['FAPS1'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k$')
ax3.plot(FAPSGMoutdegreepdf['f'], FAPSGMoutdegreepdf['ccdf'], '-^', color=colorlist['FAPS2'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{out}$')
ax3.plot(FAPSGMindegreepdf['f'], FAPSGMindegreepdf['ccdf'], '-s', color=colorlist['FAPS3'], 
         markersize=6, markerfacecolor='#ffffff', linewidth=1, markeredgewidth=1, label='$k^{in}$')
ax3.text(1, 0.001, r'$p = 0.65$',fontsize=12)
ax3.text(1, 0.00057, r'$a = 1$',fontsize=12)
ax3.text(1, 0.00035, r'$w = 0.8$',fontsize=12)
ax3.text(1, 0.0002, r'$q = 0.125$',fontsize=12)
ax3.set_ylabel('Cumulative probability', fontsize=14)
ax3.set_xlabel('Degree', fontsize=14)
ax3.legend(frameon=False, fontsize=12)
ax3.set_title('Paper (cit-HepTh, GM)', fontsize=16, verticalalignment='bottom')

ax4 = fig.add_subplot(grid[1:2, 0:1])
ax4.set_xscale('log')
ax4.set_yscale('log')
ax4.errorbar(FTGMdegree_all2.index, FTGMdegree_all2['mean'], yerr=FTGMdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FT1alpha'], color=colorlist['FT1'], 
             markerfacecolor=colorlist['FT3'], capsize=0,capthick=0, markeredgewidth=1.5, elinewidth=1.5, markersize=6, label='Theorem (GM)')
ax4.errorbar(FMGMdegree_all2.index, FMGMdegree_all2['mean'], yerr=FMGMdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FM1alpha'], color=colorlist['FM1'], 
             markerfacecolor=colorlist['FM3'], capsize=0,capthick=0, markeredgewidth=1.5, elinewidth=1.5, markersize=6,label='Paper (math, GM)')
ax4.errorbar(FAPSGMdegree_all2.index, FAPSGMdegree_all2['mean'], yerr=FAPSGMdegree_all2['sem'], fmt='o-',
             ecolor=colorlist['FAPS1alpha'], color=colorlist['FAPS1'], markerfacecolor=colorlist['FAPS3'], 
             capsize=0,capthick=0, markeredgewidth=1.5, elinewidth=1.5, markersize=6, label='Paper (cit-HepTh, GM)')
ax4.set_ylim(1.5,200)
ax4.set_xlabel('$k^{out}$', fontsize=14)
ax4.set_ylabel('$<k^{in}>$', fontsize=14)
ax4.legend(frameon=False, fontsize=12, loc='upper right')

x = np.arange(len(species))
width = 0.4
multiplier = 0.5

ax5 = fig.add_subplot(grid[1:2, 1:2])

rect5 = patches.Rectangle((-0.775, -0.08), 4, 0.08, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax5.add_patch(rect5)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering_GM.items(),clustering_GM.items()):
    offset = width * multiplier
    rects = ax5.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax5.bar_label(rects, padding=3)
    multiplier += 1
#ax4.axhline(y=0, xmin=0, xmax=1, color='#252525', linestyle='--')

ax5.set_ylabel('Average clustering coefficient', fontsize=14, labelpad=4)
ax5.set_xlabel('Network', fontsize=14, labelpad=4)
ax5.set_xticks(x + width)
ax5.set_xticklabels(species)
ax5.set_xlim(-0.2,3)
ax5.set_ylim(-0.048,0.24)
ax5.tick_params(axis='both', which='major', pad=7.5)
#ax5.set_yticks([0,0.02,0.04,0.06,0.08,0.10,0.12,0.14,0.16])
ax5.legend(frameon=False, fontsize=11,ncol=2)

x = np.arange(len(species))
width = 0.4
multiplier = 0.5

ax6 = fig.add_subplot(grid[1:2, 2:3])

rect6 = patches.Rectangle((-0.775, -0.08), 4, 0.08, fill=True,
                         edgecolor='none', facecolor='#c9e2a8',alpha=0.2)
ax6.add_patch(rect6)

for (attribute1, color_c),(attribute2, measurement) in zip(color_clustering_GM.items(),DC_correlation_GM.items()):
    offset = width * multiplier
    rects = ax6.bar(x + offset, measurement, width, label=attribute2, color=color_c)
    ax6.bar_label(rects, padding=3)
    multiplier += 1
ax6.set_ylabel('Correlation (Disruption, Citations)', fontsize=14, labelpad=4)
ax6.set_xlabel('Network', fontsize=14, labelpad=4)
ax6.set_xticks(x + width)
ax6.set_xticklabels(species)
ax6.set_xlim(-0.2,3)
ax6.set_ylim(-0.06,0.3)
ax6.tick_params(axis='both', which='major', pad=7.5)
ax6.legend(frameon=False, fontsize=11, loc='upper right',ncol=2)

plt.subplots_adjust(wspace=0.25, hspace=0.25)

ax1.text(-0.18, 1.07, 'a', size=14, weight='bold', transform=ax1.transAxes)
ax2.text(-0.18, 1.07, 'b', size=14, weight='bold', transform=ax2.transAxes)
ax3.text(-0.18, 1.07, 'c', size=14, weight='bold', transform=ax3.transAxes)
ax4.text(-0.18, 1.03, 'd', size=14, weight='bold', transform=ax4.transAxes)
ax5.text(-0.18, 1.03, 'e', size=14, weight='bold', transform=ax5.transAxes)
ax6.text(-0.18, 1.03, 'f', size=14, weight='bold', transform=ax6.transAxes)