In [None]:
"""
Purpose: To create all the options of null models
to run through the statistics analysis

Reason for doing that:
1) Figure out what will be the edge count for different parameters
2) Be able to pick clusters of null models with about 
same number of nodes and edges that can be used for comparisons
(independent of their parameters)
"""

In [None]:
import numpy as np

In [4]:
import datajoint as dj
import numpy as np
m65 = dj.create_virtual_module('m65', 'microns_minnie65_01')
schema = dj.schema("microns_minnie65_01")
dj.config["display.limit"] = 20

Connecting celiib@10.28.0.34:3306


In [None]:
from importlib import reload
import graph_generating_functions_library as g_gen
g_gen= reload(g_gen)

In [1]:
from graph_generating_functions_library import *

total_graphs = [
"erdos_renyi_random_location",
"watts_strogatz_graph_smallworld_random_location",
"random_tree_random_location",
"random_uniform",
"random_power_law",
"linear_preferencial_attachment_random",
"linear_preferncial_attachment_wheel",
"vertex_duplication",
"vertex_duplication_with_mutation",
"vertex_duplication_with_complement"
]

n_options = np.arange(40,1000,40).astype("int")
m_options = np.arange(5,20,5).astype("int")
p_options = np.round(np.arange(0.1,0.9,0.2),2)
p2_options = np.round(np.arange(0.1,0.9,0.2),2)
alpha_options = np.round(np.arange(1.5,3.5,0.1),2)

graph_parameters_to_iterate = {
    erdos_renyi_random_location:dict(
                                    n=n_options,
                                    p=p_options),
    watts_strogatz_graph_smallworld_random_location: dict(
        n=n_options,
        p=p_options,
        m=m_options
    ),
    random_tree_random_location: dict(
        n=n_options),
    
    random_uniform: dict(n=n_options,
                  m=m_options),
    
    random_power_law:dict(n=n_options,
                     alpha=alpha_options),
    
    linear_preferencial_attachment_random:dict(
    n=n_options,
    m=m_options,
        p=p_options,
    ), 
    
    linear_preferncial_attachment_wheel:dict(
    n=n_options,
        m=m_options),
    
    vertex_duplication:dict(n=n_options,
                           p=p_options),
    
    vertex_duplication_with_mutation:dict(n=n_options,
                                         p=p_options,
                                         p2=p2_options),
    
    
    vertex_duplication_with_complement:dict(n=n_options,
                                           p=p_options,
                                           p2=p2_options)
        
}



In [2]:
import hashlib

def hash_string(s):
    return int(hashlib.sha256(s.encode('utf-8')).hexdigest(), 16) % 10**8

def get_graph_hash(func_name,param_dict):
    total_s = func_name  
    for k,v in param_dict.items():
        total_s += f"_{k}_{v}"
    #print("total_s = " + total_s)
    return hash_string(total_s)

play_dict = dict(n=10,p=5)
get_graph_hash("Brendan",play_dict)

80771626

In [None]:
@schema
class NullModelParameters(dj.Manual):
    definition="""
    graph_hash: int unsigned # the hash of the type of graph
    ---
    graph_name : char(60)              # the type of graph used to generate
    n =NULL:          int unsigned      # number of nodes
    p=NULL :          double            # probability of connection ro duplication or rearranging an edge
    m=NULL :          int unsigned      # number of desired edges
    alpha=NULL:       double            # the exponent for the power sequence
    p2=NULL:          double            # another probability that is used for generation
    edge_average  :    double #the average number of edges
    edge_deviation :   double #the deviation of the edge sizes
    """


In [None]:
NullModelParameters()

In [None]:
n_iterations = 20
import itertools

total_combin = 0
for g_func,g_param_dict in graph_parameters_to_iterate.items():
    print(f"\nWorking on graph:{g_func.__name__}")
    
    iterable_loops = [dict([(k,v)]) for k,v in g_param_dict.items()]
    #print("iterable_loops = " + str(iterable_loops))
    parameter_names = [list(k.keys())[0] for k in iterable_loops]

    # initializing list of list  
    all_list = [k[list(k.keys())[0]] for k in iterable_loops]


    parameter_combinations = list(itertools.product(*all_list)) 
    print("len(parameter_combinations) = " + str(len(parameter_combinations)))
    total_combin += len(parameter_combinations)
    #for param_dict in [dict([(k,v) for k,v in zip(parameter_names,j)]) for j in parameter_combinations]:
        
print("Total Datapoints = " + str(total_combin))
        
    
          

# Running the Erdos Renyi Graphs

In [None]:
# The models I want to run
graph_models_to_run = [
    "LPA_random",
    "VD_basic",
    "VD_mutation",
    "VD_complement"
]

In [None]:
n_iterations = 20
from tqdm import tqdm
import itertools


for g_func,g_param_dict in graph_parameters_to_iterate.items():
    print(f"\nWorking on graph:{g_func.__name__}")
    
    if g_func.__name__ not in graph_models_to_run:
        print("Not in list to run so skipping")
        continue
    
    iterable_loops = [dict([(k,v)]) for k,v in g_param_dict.items()]
    #print("iterable_loops = " + str(iterable_loops))
    parameter_names = [list(k.keys())[0] for k in iterable_loops]

    # initializing list of list  
    all_list = [k[list(k.keys())[0]] for k in iterable_loops]


    parameter_combinations = list(itertools.product(*all_list)) 
    print("len(parameter_combinations) = " + str(len(parameter_combinations)))
    param_dict_combinations = [dict([(k,v) for k,v in zip(parameter_names,j)]) for j in parameter_combinations]
    dicts_to_insert = []
    for u,param_dict in enumerate(tqdm(param_dict_combinations)):
#         if u > 3:
#             break
        edge_list= []
        for i in range(n_iterations):
            try:
                G = g_func(**param_dict)
                edge_list.append(len(G.edges()))
            except:
                print(f"Could not generate this graph with param = {param_dict} so moving to next combination")
                break
        
        #continue if couldn't make the graph
        if len(edge_list) != n_iterations:
            continue
                
        """
        graph_name : char(60)              # the type of graph used to generate
        n =NULL:          int unsigned      # number of nodes
        p=NULL :          double            # probability of connection ro duplication or rearranging an edge
        m=NULL :          int unsigned      # number of desired edges
        alpha=NULL:       double            # the exponent for the power sequence
        p2=NULL:          double            # another probability that is used for generation
        edge_average  :    double #the average number of edges
        edge_deviation :   double #the deviation of the edge sizes
        """
        
        graph_hash = get_graph_hash(g_func.__name__,param_dict)
        
        #compute the edge statistics
        insert_dict = dict(param_dict,
                           graph_hash = graph_hash,
                           graph_name=g_func.__name__,
                           edge_average=np.mean(edge_list),
                           edge_deviation=np.std(edge_list)
                          )
        dicts_to_insert.append(insert_dict)
        
    NullModelParameters.insert(dicts_to_insert,skip_duplicates=True)
        
    
          

In [5]:
m65.NullModelParameters()

graph_hash  the hash of the type of graph,graph_name  the type of graph used to generate,n  number of nodes,p  probability of connection ro duplication or rearranging an edge,m  number of desired edges,alpha  the exponent for the power sequence,p2  another probability that is used for generation,edge_average  the average number of edges,edge_deviation  the deviation of the edge sizes
,,,,,,,,


In [None]:
dicts_to_insert[0]