In [None]:
"""
Purpose: To create a table that has all of the stats run for graphs
with about the same number of average edges

Pseudocode:
1) Identify Suitable node number and edge range 
To get a lot of graphs from different graph types
- use it to restrict our parameter table
2) For each row in the restricted table
a. Pull down parameters
b. Run statistics machine on them
c. Push the average statistics

3) Can run these jobs in parallel and then 
pull them down as dataframe to analyze them

"""

In [1]:
import datajoint as dj
import numpy as np
m65 = dj.create_virtual_module('m65', 'microns_minnie65_01')
schema = dj.schema("microns_minnie65_01")
dj.config["display.limit"] = 100

Connecting celiib@10.28.0.34:3306


In [2]:
from importlib import reload
import graph_generating_functions_library as g_gen
import graph_statistics_and_simulations as g_stat
g_gen= reload(g_gen)
g_stat= reload(g_stat)

In [3]:
search_dict = [
    "n=280",
    "edge_average>900",
    "edge_average<1100"
]

total_search = " AND ".join(search_dict)
print(total_search)

current_table = (m65.NullModelParameters() & total_search)
current_table

n=280 AND edge_average>900 AND edge_average<1100


graph_hash  the hash of the type of graph,graph_name  the type of graph used to generate,n  number of nodes,p  probability of connection ro duplication or rearranging an edge,m  number of desired edges,alpha  the exponent for the power sequence,p2  another probability that is used for generation,edge_average  the average number of edges,edge_deviation  the deviation of the edge sizes
30308334,power_law,280,,,2.4,,1072.9,77.43829801848696
43532995,power_law,280,,,2.6,,988.5,55.6205897128033
47052275,power_law,280,,,2.7,,924.9,41.38707527719252
84965756,power_law,280,,,2.5,,1036.3,82.65839340321104


In [4]:
important_models = set(["VD_mutation",
                   "VD_complement",
                   "VD_basic",
                   "LPA_random",
                   "LPA_wheel",
                   "small_world",
                   "power_law",
                   "erdos_renyi"])

In [5]:
def check_models_match(graph_names,important_models):
    graph_names = set(graph_names)
    important_models = set(important_models)
    subgroup_models = important_models.intersection(set(dict(Counter(graph_names)).keys()))
    if len(important_models) > len(subgroup_models):
        return False
    else:
        return True

In [6]:
from collections import Counter

graph_names = (current_table).fetch("graph_name")
check_models_match(graph_names,important_models)

False

In [7]:
"""
Computes the viable n that is valid for all model
"""
from collections import Counter

n_options = m65.NullModelParameters().fetch("n")
unique_n = np.unique(n_options)
# get the most viable node option
all_model_names = np.unique(m65.NullModelParameters().fetch("graph_name"))
viable_n = []

for n in unique_n:
    models_for_n = np.unique((m65.NullModelParameters & "n=" + str(n)).fetch("graph_name"))
    if check_models_match(models_for_n,all_model_names):
        viable_n.append(n)
    

In [8]:
viable_n

[40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520]

In [None]:
np.arange(200,1000,50)

In [None]:
"""
Pseduocode for coming up with testable windows
For each viable n
1) Find the max_edge_numb and min_edge_numb for each of the viable n's
2) Start loop that looks for all graphs: Start the window from min_edge_numb to min_edge_numb + window_size
- Check the graph types in that window size
- if have all of the important graphs then save in list (n, n_edge_min, n_edge_max, Counter of types) and print out
- go onto next window until reach the max_edge_number
Go to next viable n

"""

important_models = set([
    "VD_mutation",
                   #"VD_complement",
                   "VD_basic",
                   "LPA_random",
                   "LPA_wheel",
                   "small_world",
                   "power_law",
                   #"erdos_renyi"
                       ])


total_window_sizes = np.arange(50,1001,50)

window_size_successes = dict([(n,[]) for n in viable_n])
for window_size in total_window_sizes:
    print(f"\n\n ---- Working on window_size = {window_size}----------")

    window_jump = 50

    saved_node_edge_windows = []

    
    for n in viable_n:
        longest_graph_names = []
        local_success_counter = 0
        edge_numbers = models_for_n = np.unique((m65.NullModelParameters & "n=" + str(n)).fetch("edge_average"))
        min_edge_numb,max_edge_numb = np.min(edge_numbers),np.max(edge_numbers)
        print(f"For {n} nodes: min_edge_numb = {min_edge_numb}, max_edge_len = {max_edge_numb} ")

        current_window = np.array([min_edge_numb, min_edge_numb + window_size])

        break_window_iterator = True
        while break_window_iterator:
            #pull down the graph types associated with that edge number
            search_dict = [
                f"n={n}",
                f"edge_average>={current_window[0]}",
                f"edge_average<={current_window[1]}"
            ]

            total_search = " AND ".join(search_dict)
            current_table = (m65.NullModelParameters() & total_search)



            graph_names = (current_table).fetch("graph_name")
            unique_graph_names = np.unique(graph_names)
            
            if len(longest_graph_names) < len(unique_graph_names):
                longest_graph_names = unique_graph_names
            if check_models_match(graph_names,important_models):
                active_node_edge_window = dict(
                                        n_nodes=n,
                                        edge_min=current_window[0],
                                        edge_max=current_window[1],
                                        n_graph_types = len(dict(Counter(graph_names)).keys()),
                                        graph_types = Counter(graph_names)
                                        )
                saved_node_edge_windows.append(active_node_edge_window)
                print(f"-->Yay!! Found viable parameter_windw = {active_node_edge_window}")

                window_size_successes[n] = window_size_successes[n] + [window_size]

            if current_window[1] >= max_edge_numb:
                #print("               Top of window has exceeded max_edge_number")
                break_window_iterator = False
                break
            else:
                current_window += current_window + window_jump
        print(f"                          Longest graph names list = {longest_graph_names}")
        print(f"                          Missing graph = {set(important_models).difference(set(longest_graph_names))}")

In [None]:
viable_n

In [None]:
min_window_success = dict([(n,np.min(window_size_successes[n])) for n in window_size_successes.keys()])
min_window_success

# Getting the actual variable ones to test over

In [9]:
min_window_success = {40: 50,
 80: 150,
 120: 50,
 160: 100,
 200: 150,
 240: 150,
 280: 250,
 320: 200,
 360: 200,
 400: 200,
 440: 250,
 480: 250,
 520: 300}

In [17]:
important_models = set([
    "VD_mutation",
                   #"VD_complement",
                   "VD_basic",
                   "LPA_random",
                   "LPA_wheel",
                   "small_world",
                   "power_law",
                   "erdos_renyi"
                       ])

saved_node_edge_windows = []
for n,window_size in min_window_success.items():
    print(f"\n\n ---- Working on n_nodes = {n}, window_size = {window_size}----------")

    window_jump = 50

    longest_graph_names = []
    local_success_counter = 0
    edge_numbers = models_for_n = np.unique((m65.NullModelParameters & "n=" + str(n)).fetch("edge_average"))
    min_edge_numb,max_edge_numb = np.min(edge_numbers),np.max(edge_numbers)
    print(f"For {n} nodes: min_edge_numb = {min_edge_numb}, max_edge_len = {max_edge_numb} ")

    current_window = np.array([min_edge_numb, min_edge_numb + window_size])

    break_window_iterator = True
    while break_window_iterator:
        #pull down the graph types associated with that edge number
        search_dict = [
            f"n={n}",
            f"edge_average>={current_window[0]}",
            f"edge_average<={current_window[1]}"
        ]

        total_search = " AND ".join(search_dict)
        current_table = (m65.NullModelParameters() & total_search)



        graph_names = (current_table).fetch("graph_name")
        unique_graph_names = np.unique(graph_names)

        if len(longest_graph_names) < len(unique_graph_names):
            longest_graph_names = unique_graph_names
        if check_models_match(graph_names,important_models):
            active_node_edge_window = dict(
                                    n_nodes=n,
                                    edge_min=current_window[0],
                                    edge_max=current_window[1],
                                    n_graph_types = len(dict(Counter(graph_names)).keys()),
                                    graph_types = Counter(graph_names)
                                    )
            saved_node_edge_windows.append(active_node_edge_window)
            print(f"-->Yay!! Found viable parameter_windw = {active_node_edge_window}")

        if current_window[1] >= max_edge_numb:
            #print("               Top of window has exceeded max_edge_number")
            break_window_iterator = False
            break
        else:
            current_window += current_window + window_jump
    



 ---- Working on n_nodes = 40, window_size = 50----------




For 40 nodes: min_edge_numb = 26.8, max_edge_len = 600.0 
-->Yay!! Found viable parameter_windw = {'n_nodes': 40, 'edge_min': 103.6, 'edge_max': 203.6, 'n_graph_types': 9, 'graph_types': Counter({'power_law': 10, 'VD_mutation': 6, 'small_world': 4, 'VD_complement': 2, 'random_uniform': 1, 'LPA_random': 1, 'erdos_renyi': 1, 'VD_basic': 1, 'LPA_wheel': 1})}


 ---- Working on n_nodes = 80, window_size = 150----------
For 80 nodes: min_edge_numb = 52.6, max_edge_len = 2212.0 
-->Yay!! Found viable parameter_windw = {'n_nodes': 80, 'edge_min': 155.2, 'edge_max': 455.2, 'n_graph_types': 9, 'graph_types': Counter({'power_law': 14, 'VD_mutation': 8, 'VD_complement': 6, 'small_world': 4, 'VD_basic': 2, 'LPA_random': 2, 'erdos_renyi': 2, 'LPA_wheel': 1, 'random_uniform': 1})}


 ---- Working on n_nodes = 120, window_size = 50----------
For 120 nodes: min_edge_numb = 80.9, max_edge_len = 4998.0 
-->Yay!! Found viable parameter_windw = {'n_nodes': 120, 'edge_min': 473.6, 'edge_max': 673.6, 'n_gra

In [14]:
# node_edge_pair = [(k['n_nodes'],np.mean([k['edge_min'],k['edge_max']])) for k in saved_node_edge_windows]
# node_edge_pair

[(40, 153.6),
 (80, 305.2),
 (120, 573.6),
 (160, 770.8),
 (200, 980.4000000000001),
 (240, 1091.6),
 (280, 1390.8),
 (320, 1391.2),
 (360, 1516.4),
 (400, 1618.0),
 (440, 1814.8000000000002),
 (480, 1913.6),
 (520, 2132.0)]

# The Missing Erdos Renyi Models (which required really low p) were accomplished in NullModelParameters_Known

In [18]:
np.savez("Viable_node_edge_restrictions.npz",saved_node_edge_windows=saved_node_edge_windows)