In [2]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Iterable, Any
from plotly import graph_objects as go, subplots as sp
from concurrent.futures import ProcessPoolExecutor as Exe
import networkx as nx

from local.caching import load, save, save_exists
from local.figures import layout, xaxis_desc, yaxis_desc
from txyl_common.biocyc_facade.pgdb import Pgdb, Dat, Traceable

In [3]:
save_name = "properties"
props = []
if not save_exists(save_name):
    for i in range(21):
        print(f"\r{i+1}", end="")
        batch = load(f"properties_{i+1}", silent=True)
        props += batch
    save(save_name, props)
else:
    props = load(save_name)

all_samples = set()
with open("./cache/samples.lst") as f:
    for i, l in enumerate(f):
        all_samples.add(l[:-1])
sections = load("batches")
samples = []
for i, name in [e for g in sections for e in g]:
    if name not in all_samples: continue
    samples.append(name)

METACYC_INDEX = 0
for i, name in enumerate(samples):
    if "metacyc" in name:
        METACYC_INDEX = i
props = [x for i, x in enumerate(props) if i != METACYC_INDEX]
props = np.array(props)
METACYC_INDEX

recovering & decompressing cached data from [{WORKSPACE}/main/explore/cache/properties.pkl.gz]
recovering & decompressing cached data from [{WORKSPACE}/main/explore/cache/batches.pkl.gz]


14682

In [4]:
save_name = "refs"

if not save_exists(save_name):
    sf_props = []
    mod_props = []

    def estimate_av_path_len(G: nx.Graph):
        seen = set()
        l_trails = []
        while len(l_trails)<100:
            l_samples = []
            while len(l_samples)<10:
                target = None
                while len(seen) < len(G) and (target is None or target in seen):
                    target = np.random.randint(0, len(G))
                l_samples.append(np.mean([l for n, l in nx.single_target_shortest_path_length(G, target)]))
            l_trails.append(np.mean(l_samples))
        return np.mean(l_trails)

    # approximates modular
    def _circulant(n):
        return nx.Graph(nx.circulant_graph(n, [i+1 for i in range(6)]))

    def _scale_free(n):
        return nx.Graph(nx.barabasi_albert_graph(n, 400))

    def _stats(G: nx.Graph):
        return (
            len(G),
            len(G.edges),
            np.sum([d for n, d in G.degree])/len(G),
            nx.cluster.average_clustering(G),
            estimate_av_path_len(G),
        )

    def _job(n):
        return (
            n,
            _stats(_scale_free(n)),
            _stats(_circulant(n)),
        )
    steps = 500
    with Exe(max_workers=8) as exe:
        for n, sf, mod in exe.map(_job, range(steps, 3001, steps)):
            print(n, end="\r")
            sf_props.append(sf)
            mod_props.append(mod)

    save(save_name, (sf_props, mod_props))
else:
    sf_props, mod_props = load(save_name)

recovering & decompressing cached data from [{WORKSPACE}/main/explore/cache/refs.pkl.gz]


In [5]:

fig = sp.make_subplots(
    rows=2, cols=2, shared_xaxes=True, shared_yaxes=False, horizontal_spacing=0.05, vertical_spacing=0.02,
)

def _add_trace(xi, yi, row, col, legend=True, props=(props, sf_props, mod_props), log_y=False):
    colors = [
        "rgba(0, 156, 255, 0.2)",
        "#F76E21",
        "#32CD32",
    ]
    modes = "markers, lines+markers, lines+markers".split(", ")
    names = "Biocyc, scale-free, modular".split(", ")
    for _props, _color, _mode, _name in zip(props, colors, modes, names):
        _y = [tup[yi] for tup in _props]
        if log_y: _y = np.log(_y)
        _legend = legend and _name != ""
        fig.add_trace(
            go.Scatter(
                x = [tup[xi] for tup in _props], y = _y,
                mode=_mode,
                marker=dict(
                    size=5,
                    symbol="circle-open",
                    color=_color,
                ),
                line=dict(width=2),
                showlegend=_legend,
                name=_name,
            ),
            row=row, col=col,
        )

SIZE, EDGES, DEGREE, CC, AV_PATH = range(5)

_add_trace(SIZE, CC,        1, 1)
_add_trace(SIZE, AV_PATH,   2, 1, legend=False)

_add_trace(SIZE, CC,        1, 2, legend=False, props=(props,))
_add_trace(SIZE, AV_PATH,   2, 2, legend=False, props=(props,))

fig.update_annotations(font_size=24)
_layout:Any = layout.copy()
_layout.update(dict(
    height=700,
    yaxis=dict(title="Clustering Coefficient", **yaxis_desc),
    yaxis2=dict(**yaxis_desc),
    yaxis3=dict(title="Est. Av. Min. Path Length", **yaxis_desc),
    yaxis4=dict(**yaxis_desc),

    xaxis2=dict(**yaxis_desc),
    xaxis3=dict(title="Graph Size", **yaxis_desc),
    xaxis4=dict(title="Graph Size", **yaxis_desc),
))
fig.update_layout(go.Layout(_layout))

fig.show()