In [1]:
import numpy as np 
import networkx as nx
import pickle
import random 
import json
from tqdm import tqdm

In [2]:
def get_motif_abs(graph):
    """Return a list of motif absolute occurences
    
    graph: the graph
    """
    # get all triads
    triads = nx.triadic_census(graph)
    
    # remove first three triads 
    triads_to_remove = ('003', '012', '102')
    for k in triads_to_remove:
        triads.pop(k, None)
    
    return list(triads.values())

def get_motif_sig_profile(graph, rand_num):
    """Returns a the graph motif significance profile
    
    graph: the graph
    rand_num: the number of random graphs to compute 
    """    
    triads = get_motif_abs(graph)
    
    # compute rand_num significance profiles of random graphs
    rand_triads = [] 
    
    for i in range(rand_num):
        # generate random configuration model
        din = list(d for n, d in graph.in_degree())
        dout = list(d for n, d in graph.out_degree())
        rand_graph = nx.directed_configuration_model(din, dout, create_using=nx.DiGraph())

        rand_triads.append(get_motif_abs(rand_graph))
    
    rand_triads = np.array(rand_triads)
    
    rand_mean = rand_triads.mean(axis=0)
    rand_std = rand_triads.std(axis=0)
    
    # divide and check for zeros 
    a = (triads - rand_mean)
    b = rand_std
    z_scores = np.divide(a, b, out=np.zeros_like(a), where=b!=0)
    
    # normalize Z scores 
    a = z_scores
    b = np.sqrt(np.sum(z_scores**2))
    z_scores = np.divide(a, b, out=np.zeros_like(a), where=b!=0)
        
    return z_scores.tolist()

In [3]:
reddit_filename = '../data/reddit/graphs-redditHyperlinks-body_directed.pkl'
with open(reddit_filename, 'rb') as f:
    graphs = pickle.load(f)

In [4]:
len(graphs)

1217

In [5]:
# get subset 
# graphs = graphs [0:300]
# len(graphs)

In [6]:
motif_sig_list = []

In [7]:
for index, G in tqdm(enumerate(graphs)): 
    motif_sig_list.append(get_motif_sig_profile(G, 100))

1217it [17:24,  1.17it/s]


In [8]:
print(len(motif_sig_list))

1217


In [9]:
triads = list(nx.algorithms.triads.TRIAD_NAMES)[3:]
print(triads)

['021D', '021U', '021C', '111D', '111U', '030T', '030C', '201', '120D', '120U', '120C', '210', '300']


In [10]:
result = {'graphs' : graphs,
         'motif_sp' : motif_sig_list,
         'motifs' : triads}

filename = 'reddit.pkl'
pickle.dump( result, open( filename, 'wb' ))