In [1]:
import numpy as np 
import networkx as nx
from scipy import stats
import pickle
import random 
import json
import datetime
import math
from tqdm import tqdm
from fa2 import ForceAtlas2
import matplotlib.pyplot as plt
import random

In [2]:
def get_motif_abs(graph):
    """Return a list of motif absolute occurences
    
    graph: the graph
    """
    # get all triads
    triads = nx.triadic_census(graph)
    
    # remove first three triads 
    triads_to_remove = ('003', '012', '102')
    for k in triads_to_remove:
        triads.pop(k, None)
    
    return list(triads.values())

def get_motif_sig_profile(graph, rand_num):
    """Returns a the graph motif significance profile
    
    graph: the graph
    rand_num: the number of random graphs to compute 
    """    
    triads = get_motif_abs(graph)
    
    # compute rand_num significance profiles of random graphs
    rand_triads = [] 
    
    din = list(d for n, d in graph.in_degree())
    dout = list(d for n, d in graph.out_degree())
    
    for i in range(rand_num):
        # generate random configuration model
        rand_graph = nx.directed_configuration_model(din, dout, create_using=nx.DiGraph())
        rand_triads.append(get_motif_abs(rand_graph))
        
    rand_triads = np.array(rand_triads)
        
    rand_mean = np.round(rand_triads.mean(axis=0), 0)
    rand_std = rand_triads.std(axis=0)
        
    # divide and check for zeros 
    a = (triads - rand_mean)
    b = rand_std
    z_scores = np.divide(a, b, out=np.zeros_like(a), where=b!=0)
    
    # normalize Z scores 
    a = z_scores
    b = np.sqrt(np.sum(z_scores**2))
    z_scores = np.divide(a, b, out=np.zeros_like(a), where=b!=0)
        
    return z_scores.tolist()

In [3]:
# din = list(d for n, d in G.in_degree())
# dout = list(d for n, d in G.out_degree())
# rand_graph = nx.directed_configuration_model(din, dout, create_using=nx.DiGraph(), seed=random.seed())
# 
# get_motif_sig_profile(rand_graph, 1000)

In [4]:
# din = list(d for n, d in G.in_degree())
# dout = list(d for n, d in G.out_degree())
# rand_graph = nx.directed_configuration_model(din, dout, create_using=nx.DiGraph(), seed=0)
# get_motif_sig_profile(rand_graph, 1000)

In [5]:
graphs = []

In [6]:
for i in range(0,10):
    G = nx.gnr_graph(50+i, 0.8)
    graphs.append(G)
    
print(len(graphs))

10


In [7]:
for i in range(0,10):
    n = i % 10
    e = 3 + i%5
    G = nx.ring_of_cliques(3+n, e).to_directed()   
    graphs.append(G)
    
print(len(graphs))

20


In [8]:
for i in range(0,10):
    n = 50 + i
    e = 10 + 2*i
    G = nx.gnm_random_graph(50 + i , e, directed=True)
    graphs.append(G)
    
print(len(graphs))

30


In [9]:
for i in range(0,10):
    n = i % 10
    e = 3 + i%3
    G = nx.connected_caveman_graph(i, e).to_directed()   
    graphs.append(G)
    
print(len(graphs))

40


In [10]:
for i in range(0,10):
    G = nx.gnr_graph(50+i, 0.7)
    graphs.append(G)
    
print(len(graphs))

50


In [11]:
for i in range(0,10):
    G = nx.fast_gnp_random_graph(50+i, 0.1, directed=True)
    graphs.append(G)
    
print(len(graphs))

60


In [12]:
# for i in range(0,20):
#     D = nx.erdos_renyi_graph(20, 0.25, directed=True)
#     din = list(d for n, d in D.in_degree())
#     dout = list(d for n, d in D.out_degree())
#     rand_graph = nx.directed_configuration_model(din, dout, create_using=nx.DiGraph())
#     graphs.append(rand_graph)
# 
# print(len(graphs))

In [13]:
# add time attribute to synthetic data 
for index, G in enumerate(graphs):
    date = datetime.date.today() + datetime.timedelta(days=math.floor(index/24))
    hour = index%24
    G.graph['time'] = (date, hour)

In [14]:
# Compute the graph layout
G = nx.Graph()
for graph in tqdm(graphs):
    G.add_nodes_from(graph.nodes(data=True))
    G.add_edges_from(graph.edges(data=True))

forceatlas2 = ForceAtlas2(verbose=False)
coordinates = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=1000)

# modify positions 
for graph in tqdm(graphs):
    nx.set_node_attributes(graph, coordinates, 'coord')

100%|██████████| 60/60 [00:00<00:00, 1766.73it/s]
100%|██████████| 60/60 [00:00<00:00, 12679.91it/s]


In [15]:
# random.shuffle(graphs)

In [16]:
motif_sig_list = []

In [17]:
for index, G in tqdm(enumerate(graphs)): 
    motif_sig_list.append(get_motif_sig_profile(G, 100))

60it [00:51,  1.15it/s]


In [18]:
print(len(motif_sig_list))

60


In [19]:
triads = list(nx.algorithms.triads.TRIAD_NAMES)[3:]
print(triads)

['021D', '021U', '021C', '111D', '111U', '030T', '030C', '201', '120D', '120U', '120C', '210', '300']


In [20]:
result = {'graphs' : graphs,
         'motif_sp' : motif_sig_list,
         'motifs' : triads}

filename = 'network_structures.pkl'
pickle.dump( result, open( filename, 'wb' ))

In [21]:
print('150 Nodes') 

edges = 0 
for graph in tqdm(graphs):
    edges = edges + len(graph.edges)
print('Number of edges: ', edges)

100%|██████████| 60/60 [00:00<00:00, 46655.22it/s]

150 Nodes
Number of edges:  6831



