In [6]:
#import of packages
import networkx as nx
import matplotlib.pyplot as plt
import random  
import numpy as np
import sys
from  fractions import Fraction
import timeit
from time import sleep
from collections import defaultdict
import heapq as heap
from tqdm import tqdm
from community import community_louvain
import collections 
import matplotlib.cm as cm
import statistics

from IPython.display import HTML, display
from itertools import chain, combinations
from itertools import islice
from collections import Counter, defaultdict
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
from joblib import Parallel, delayed
import csv
from pprint import pprint
import math
from operator import itemgetter
import itertools
import scipy
from scipy import stats
from IPython.core.magic import register_line_magic
from IPython.display import HTML, display
import json
import powerlaw
from decimal import Decimal

In [8]:
#The HICH-BA model uses the following parameters: 
#(i) n, i.e., the desired number of nodes,
#(ii) p_N , i.e., the probability of adding a node to the graph, with probability 1 − pN an edge is added,
#(iii) r, list where each entry ri corresponds to the probability of a new node belonging to community i,
#(iv) h, i.e., the homophily factor and represents the probability of a node establishing an intra-community connection,
#(v)p_T, the probability to form a close triad connection,
#(vi) p_PA, the probability with which a new edge will be established using the preferential attachment (PA)

def hichba(n,r,h,p_PA,p_N,p_T):
    
    num_com=len(r)
    G= nx.Graph()
    nx.set_node_attributes(G, [], "community")
    G.add_nodes_from(range(num_com))
    nodes=len(G.nodes())
    
    choices_c={c:[] for c in range(num_com)}
    choices_weights_c={c:{} for c in range(num_com)}
    
    c=0
    for v in G.nodes():
        G.nodes[v]['community']= c
        choices_c[c].append(v)
        choices_weights_c[c][v]=1
        c+=1
        
    L_values,x_val=[],[]
    pbar = tqdm(total=n, position=0, leave=True)
    pbar.update(len(G.nodes()))
    h_orig=h
    while nodes<=n:
        if random.uniform(0,1)<=p_N:
            G.add_node(nodes-1)
            source=nodes-1
            nodes+=1
            c=random.choices(range(num_com), weights=r, k=1)[0]
            G.nodes[source]['community']= c

            choices_c[c].append(source)
            choices_weights_c[c][source]=1 

            choices=[x for x in choices_c[c] if x!=source]

            if random.uniform(0, 1)<=(1-p_PA):weights=[1 for v in choices]
            else:weights=[choices_weights_c[G.nodes[v]['community']][v] for v in choices]
            
            
            if len(choices)==0:continue
            target=random.choices(choices, weights=weights, k=1)[0]

            G.add_edge(source, target)

            choices_weights_c[c][source]+=1
            choices_weights_c[G.nodes[target]['community']][target]+=1
            pbar.update(1)

        else:
            if random.uniform(0,1)<=p_T:
                if random.uniform(0,1)<=(1-p_PA):
                    if len([x for x in G.nodes() if G.degree(x)>=2])==0:continue
                    v=random.choice([x for x in G.nodes() if G.degree(x)>=2])
                else:
                    if len([x for x in G.nodes() if G.degree(x)>=2])==0:continue
                    v=random.choices([x for x in G.nodes() if G.degree(x)>=2], weights=[G.degree(x)+1 for x in G.nodes() if G.degree(x)>=2],k=1)[0]
                
                target1=random.choice(list(G.neighbors(v)))
                options=[y for y in G.neighbors(v) if not G.has_edge(target1,y)]
                if len(options)==0: continue
                intra_inter= random.uniform(0, 1)
                if intra_inter<=h: choices=[x for x in options if G.nodes[v]['community']==G.nodes[x]['community']]
                else:choices=[x for x in options if G.nodes[v]['community']!=G.nodes[x]['community']]
                    
                if random.uniform(0, 1)<=(1-p_PA):weights=[1 for w in options]
                else: weights=[choices_weights_c[G.nodes[w]['community']][w] for w in options] 
                
                if len(options)==0: print("no ", intra_inter);continue
                target2=random.choices(options, weights=weights, k=1)[0]
                
                G.add_edge(target1, target2)
                choices_weights_c[G.nodes[target1]['community']][target1]+=1
                choices_weights_c[G.nodes[target2]['community']][target2]+=1
                
                
            else:
                if random.uniform(0,1)<=(1-p_PA):
                    v=random.choice([x for x in G.nodes() ])
                else:
                    v=random.choices([x for x in G.nodes() ], weights=[G.degree(x)+1 for x in G.nodes()],k=1)[0]
                    
                neigh=list( G.neighbors(v))
                options=[x for x in G.nodes() if x not in neigh]
                intra_inter= random.uniform(0, 1)
                if intra_inter<=h: choices=[x for x in options if G.nodes[v]['community']==G.nodes[x]['community']]
                else:choices=[x for x in options if G.nodes[v]['community']!=G.nodes[x]['community']]

                if random.uniform(0, 1)<=(1-p_PA):weights=[1 for v in choices]
                else:weights=[choices_weights_c[G.nodes[v]['community']][v] for v in choices] 

                if len(choices)==0:continue
                target=random.choices(choices, weights=weights, k=1)[0]
                if (intra_inter>h and random.uniform(0,1)<=r[G.nodes[target]['community']]/r[G.nodes[v]['community']]) or intra_inter<h :
                    G.add_edge(v, target)

                    choices_weights_c[G.nodes[v]['community']][v]+=1
                    choices_weights_c[G.nodes[target]['community']][target]+=1
        
    return G

In [None]:
#example code to generate a synthetic network, this is SG5 in the paper
n=10000 # number of iterations 
r=[0.54,0.3,0.15,0.005,0.005]
h=0.2
p_N=1/(20)
p_T=0.9
p_PA=0.9
Homo_BA=hichba(n,r,h,p_PA,p_N,p_T)
Homo_BA=Homo_BA.to_directed()
print(nx.attribute_assortativity_coefficient(Homo_BA, "community"))
print("nodes:"+str(len(Homo_BA.nodes()))+" edges:"+str(len(Homo_BA.edges())))
print("Average degree :"+str(sum([Homo_BA.out_degree(x) for x in Homo_BA.nodes()])/len(Homo_BA.nodes())))
print(sorted([(Homo_BA.out_degree(x),x) for x in Homo_BA.nodes()],reverse=True)[0:10]) 
intra_edges,inter_edges=0,0
for e in Homo_BA.edges():
    if Homo_BA.nodes[e[0]]['community'] == Homo_BA.nodes[e[1]]['community']:
        intra_edges+=1
    else:
        inter_edges+=1
print(inter_edges,intra_edges)

print("clustering coefficient: "+str(nx.average_clustering(Homo_BA,Homo_BA.nodes())))
degree_freq = nx.degree_histogram(Homo_BA.to_undirected())
degrees = range(len(degree_freq))
plt.figure(figsize=(8, 6)) 
plt.loglog(degrees, degree_freq,'bo') 
plt.xlabel("Degree",fontsize=14)
plt.ylabel("Frequency",fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

 19%|██████████████▌                                                              | 1898/10000 [01:16<10:33, 12.79it/s]