In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import igraph as ig

In [None]:
def getDF():
    ## make the vertex and communities 0-based
    g = ig.Graph.Read_Ncol('./network.dat', directed=False)
    c = pd.read_csv('./community.dat',sep='\t',header=None)[1].tolist()
    ix = [int(x)-1 for x in g.vs['name']]
    comm = [c[i]-1 for i in ix]
    g.vs['comm'] = comm
    g.simplify();

    g.vs['deg'] = g.degree()
    L = list()
    for c in np.arange(0,np.max(g.vs['comm'])+1,1):
        sg = g.subgraph([v for v in g.vs if v['comm']==c])

        L.append([sg.vcount(),sg.ecount(),np.mean(sg.degree()),np.std(sg.degree()),sum(sg.vs['deg'])])
    D = pd.DataFrame(data=L, columns=['nodes','edges','avg_deg','std_deg','sum_deg'])
    D = D.sort_values(by='nodes')
    D['mu'] = 1-2*D['edges']/D['sum_deg']
    return D

In [None]:
def plotReg(D, title=''):
    plt.scatter(D['nodes'],D['mu'],marker='.')
    plt.xlabel('Community size')
    plt.ylabel(r'Average empirical $\mu$ value')
    plt.title(title)
    plt.hlines(.2,min(D['nodes']),max(D['nodes']),linestyles='dashed')
    reg = LinearRegression().fit(D['nodes'].values.reshape(-1,1),D['mu'].values.reshape(-1,1) )        
    print(reg.intercept_[0],reg.coef_[0][0],reg.predict(np.array(min(D['nodes'])).reshape(-1,1))[0][0],
          reg.predict(np.array(max(D['nodes'])).reshape(-1,1))[0][0])
    

In [None]:
plotReg(getDF())