In [2]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Iterable, Any
from plotly import graph_objects as go, subplots as sp
from concurrent.futures import ProcessPoolExecutor as Exe
import networkx as nx
import numba

from local.caching import load, save, save_exists
from local.figures import layout, xaxis_desc, yaxis_desc
from txyl_common.biocyc_facade.pgdb import Pgdb, Dat, Traceable

In [3]:
def _circulant(n, k):
    return nx.Graph(nx.circulant_graph(n, [i+3 for i in range(k)]))

def _scale_free(n, k, seed:int=0, g=None):
    return nx.Graph(nx.barabasi_albert_graph(n, k, seed=seed, initial_graph=g))

# G = _circulant(100, 2)
# G = _scale_free(n, 99, _circulant(n, 1))
# ns, k = [15, 45, 45], 4
ns, k = [15, 75], 4
nlinks = 1
seed = 125
np.random.seed(seed)
G = nx.Graph()
for i, n in enumerate(ns):
    module = _scale_free(n, k, seed=seed+len(G))
    s = len(G)
    if s != 0:
        for _ in range(nlinks):
            a = np.random.randint(0, s)
            b = np.random.randint(s, s+len(module))
            G.add_edge(a, b)

    G.add_edges_from([(s+a, s+b) for a, b in module.edges])
    
pos = nx.kamada_kawai_layout(G)
# pos = nx.spring_layout(G)

In [34]:
# change in Q for each v in cluster if moved to other
def DeltaQ(G: nx.Graph, cluster, other):
    degrees_within = [G.degree[u] for u in cluster]
    degrees_out = [G.degree[u] for u in other]
    sum_in = np.sum(degrees_within)
    sum_out = np.sum(degrees_out)

    for v in cluster:
        neighbours = [u for u in G.neighbors(v)]
        edges_within = [u for u in neighbours if u in cluster]
        edges_out = [u for u in neighbours if u not in cluster]

        # vsum_in =  (sum_in - G.degree[v]) / (len(cluster))
        # vsum_out = (sum_out - G.degree[v]) / (len(G.nodes) - len(cluster))
        vsum_in = (sum_in - G.degree[v])
        vsum_out = (sum_out - G.degree[v])
        Q = (len(edges_out) - len(edges_within)) / len(G.edges)
        # Q -= G.degree[v] * (vsum_out - vsum_in) * len(G) / (2 * len(G.edges)**2)
        Q -= G.degree[v] * (vsum_out - vsum_in) / (2 * len(G.edges)**2)
        
        # if Q > 0:
        #     yield Q, v
        yield Q, v

def ToSwap(G: nx.Graph, cluster: set[int], other: set[int]):
    to_give = DeltaQ(G, cluster, other) if len(cluster)>1 else []
    to_get = DeltaQ(G, other, cluster) if len(other)>1 else []

    to_give = [(q, v) for q, v in sorted(to_give, key=lambda t: t[0], reverse=True) if q > 0]
    to_get = [(q, v) for q, v in sorted(to_get, key=lambda t: t[0], reverse=True) if q > 0]

    if len(to_get) > 0 and len(to_give) > 0:
        rand_give: int = np.random.randint(0, len(cluster))
        to_give.append(rand_give+)
        rand_get = np.random.randint(0, len(G))

    # t = 0.0025
    return [v for q, v in to_give][:5], [v for q, v in to_get][:5]
    # return [v for q, v in to_give][:5], [v for q, v in to_get][:5]
    # return to_give[:5], to_get[:5]

# a = set([77, 1, 55])
a = set([77])
# a = set(range(15))
b = set(x for x in range(0, len(G)) if x not in a)

iterations = [(a, b)]
for i in range(100):
    give, get = (set(x) for x in ToSwap(G, a, b))
    newa = a.difference(give).union(get).copy()
    newb = b.difference(get).union(give).copy()
    a = newa
    b = newb
    iterations.append((a, b))
    # break
# print([x for x in give])
# print([x for x in get])
# print(a)
# print(b)

In [31]:
aa, bb = iterations[-1]

In [36]:
# a = set(range(7))
# b = set(x for x in range(0, len(G)) if x not in a)

# for q, v in ClusterScores(G, aa, bb):
for q, v in DeltaQ(G, aa, bb):
    print(q, v)
    # pass

-0.024741087018782162 15
0.019068559972653616 16
-0.01299877126042812 17
0.008379449561626371 18
-0.03627091397899132 21
-0.022643914967526168 22
-0.016564887611903067 24
-0.02809471457211223 25
-0.024741087018782162 26
-0.009016915956061012 27
-0.005663288402730943 28
-0.015095943311684111 30
-0.00629151615376798 33
-0.013626999011465156 35
0.002512911004148151 36
-0.01299877126042812 39
-0.01572417106272115 41
-0.02599754252085624 42
-0.009016915956061012 44
-0.009645143707098049 48
-0.0014689443002189558 49
-0.010901599209172125 50
-0.015095943311684111 52
-0.0014689443002189558 54
0.0012564555020740755 55
-0.007547971655842056 56
-0.004822571853549024 58
-0.016352398813758186 62
-0.004194344102511988 63
-0.0014689443002189558 64
-0.010901599209172125 65
-0.010901599209172125 66
-0.010901599209172125 67
-0.016352398813758186 68
-0.010901599209172125 72
-0.004822571853549024 73
-0.010901599209172125 74
-0.013626999011465156 76
-0.010901599209172125 77
-0.004822571853549024 80
-0.0109

In [28]:
_layout: Any = layout.copy()
no_col = 'rgba(0, 0, 0, 0)'
axis_desc: dict = dict(
    linecolor=no_col, gridcolor=no_col, zerolinecolor=no_col, zerolinewidth=0,
    visible=False, range=(-1.1, 1.1), autorange=False,
)

_layout.update(dict(
    width=700, height=700,
    xaxis=axis_desc, yaxis=axis_desc,
))

def _plot(G: nx.Graph, pos: dict, groups: Iterable[set]):
    colors = "#0093CD, #FF9447, #90EA85".split(", ")
    glookup = {}
    for i, g in enumerate(groups):
        for v in g:
            glookup[v] = i
    cs = [colors[glookup[v]] for v in G.nodes]
    n_plot = go.Scatter(
            x=[x for i, (x, y) in enumerate(pos.values())],
            y=[y for i, (x, y) in enumerate(pos.values())],
            mode="markers", showlegend=False,
            marker=dict(
                color=cs,
            )
        )

    edges = []
    for a, b in G.edges:
        edges += [(pos[a]), (pos[b]), (None, None)]
    e_plot = go.Scatter(
        x=[x for x, y in edges], y=[y for x, y in edges],
        mode="lines", showlegend=False,
        line=dict(width=1, color="rgba(0, 0, 0, 0.2)"),
    )
    plots = (e_plot, n_plot)
    return plots

frames = []
sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Iteration:",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 0, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}
for i, clusters in enumerate(iterations):
    slider_step = {
        "args": [
            [i],
            {"frame": {"duration": 100, "redraw": False},
            "mode": "immediate",
            "transition": {"duration": 0}}
        ],
        "label": i,
        "method": "animate"
    }
    sliders_dict["steps"].append(slider_step)
    frames.append(go.Frame(data=_plot(G, pos, clusters), name=str(i)))

fig = go.Figure(
    data=_plot(G, pos, iterations[0]),
    layout=go.Layout(
        # xaxis=dict(range=[0, 5], autorange=False),
        # yaxis=dict(range=[0, 5], autorange=False),
        # title="Start Title",
        updatemenus=[
            {
                "buttons": [
                    {
                        "args": [None, {"frame": {"duration": 100, "redraw": False},
                                        "fromcurrent": True, "transition": {"duration": 100,
                                                                            "easing": "quadratic-in-out"}}],
                        "label": "Play",
                        "method": "animate"
                    },
                    {
                        "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                        "mode": "immediate",
                                        "transition": {"duration": 0}}],
                        "label": "Pause",
                        "method": "animate"
                    }
                ],
                "direction": "left",
                "pad": {"r": 10, "t": 87},
                "showactive": False,
                "type": "buttons",
                "x": 0.1,
                "xanchor": "right",
                "y": 0,
                "yanchor": "top"
            }
        ],
        sliders = [sliders_dict],
        **_layout
    ),
    frames=frames,
)

fig.show()

In [49]:
pgdb_dir = Path("../../data/txyl_local/biocyc/pgdbs")
np.random.seed(84)
pgdb_files = list(os.listdir(pgdb_dir))
sample_file = pgdb_files[np.random.randint(0, len(pgdb_files))]
pgdb = Pgdb(pgdb_dir.joinpath(sample_file))
_info = pgdb.GetInfo()
for k in "PGDB-NAME, ORGANISM, NCBI-TAXONOMY-ID, PGDB-TIER, biocyc_facade_ver, Total_entries".split(", "):
    v = _info[k].replace('"', "")
    print(f"{k}:{' '*(25-len(k))}{v}")

PGDB-NAME:                gcf_001675515cyc
ORGANISM:                 B. thuringiensis servovar coreanensis str. ST7
NCBI-TAXONOMY-ID:         SAMN05232858
PGDB-TIER:                3
biocyc_facade_ver:        1.3
Total_entries:            29811


In [50]:
class MNetwork:
    def __init__(self, reactions: dict) -> None:
        ccode = {}
        def encode(c):
            if c not in ccode: 
                ccode[c] = len(ccode)
            return ccode[c]

        clookup = {}
        rlookup = {}
        for k, v in reactions.items():
            lefts = v.get("LEFT", [])
            rights = v.get("RIGHT", [])

            cpds = set(lefts).union(rights)
            rlookup[k] = {encode(c) for c in cpds}
            for c in cpds:
                ci = encode(c)
                ref = clookup.get(ci, set())
                ref.add(k)
                clookup[ci] = ref

        G = nx.Graph()
        for v, rxns in clookup.items():
            for rxn in rxns:
                for u in rlookup[rxn]:
                    G.add_edge(u, v)

        self.G = G
        self._ccode = ccode
        revcode = ['']*len(ccode)
        for k, i in ccode.items():
            revcode[i] = k
        self._revcode = revcode

    def Decode(self, index: int):
        return self._revcode[index] if index < len(self._revcode) else None
    
    def Encode(self, cpd: str):
        return self._ccode.get(cpd)
    
net = MNetwork(pgdb.GetDataTable(Dat.REACTIONS))

In [51]:
len(net.G.edges), len(net.G)

(11729, 1969)

In [52]:
nx.cluster.average_clustering(net.G)

0.7814277252129778

In [60]:
ds = sorted(dict(net.G.degree).items(), key=lambda t: t[1], reverse=True)
[(net.Decode(i), d) for i, d in ds][:5]

[('PROTON', 1202), ('WATER', 910), ('ATP', 431), ('PPI', 329), ('CO-A', 294)]

In [27]:
from networkx import community as nx_com

def BinaryModularity(G: nx.Graph, cluster: Iterable[int]):
    cluster_s = set(cluster)
    other_s = {n for n in G.nodes if n not in cluster_s}

    ak_c = np.sum([G.degree[n] for n in cluster]) / (2*len(G.edges))
    ak_o = np.sum([G.degree[n] for n in other_s]) / (2*len(G.edges))

    def _edges_within(group):
        edges_within = 0
        for a, b in G.edges:
            if a in group and b in group: edges_within+=1
        return edges_within

    e_c = _edges_within(cluster_s)/len(G.edges)
    e_o = _edges_within(other_s)/len(G.edges)

    Q = (e_c - ak_c**2) + (e_o - ak_o**2)
    return Q

size = 1000
ea = [(0, i+1) for i in range(size-1)]
eb = [(size, size+i+1) for i in range(size-1)]
testG = nx.Graph()
testG.add_edges_from(ea)
testG.add_edges_from(eb)
testG.add_edge(0, size)

BinaryModularity(testG, range(0, 2*size, 2))
BinaryModularity(testG, range(size-1))

0.49899937462478106

In [38]:
size = 10
ea = [(0, i+1) for i in range(size-1)]
ea += [(1, i+1) for i in range(size-1)]
eb = [(size, size+i+1) for i in range(size-1)]
eb += [(size+1, size+i+1) for i in range(size-1)]
testG = nx.Graph()
testG.add_edges_from(ea)
testG.add_edges_from(eb)
testG.add_edge(0, size)

G = testG
# cluster = range(size-1)
cluster = range(0, 2*size, 2)

cluster = set(cluster)
other_s = {n for n in G.nodes if n not in cluster}

def _to_swap(cluster):
    for v in cluster:
        neighbours = [u for u in G.neighbors(v)]
        edges_within = len([u for u in neighbours if u in cluster])
        if edges_within < len(neighbours)/2:
            yield v

to_give = _to_swap(cluster)
to_get = _to_swap(other_s)

list(to_give)

[]