In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import lib.assembly_graph

from collections import defaultdict
from statistics import mean, stdev
from math import sqrt, exp, prod, log, log1p
from pprint import pprint

import matplotlib.pyplot as plt

In [None]:
seed = {
    'ACCCG': ['CCCGG'],
    'CCCGG': ['CCGGT'],
    'CCGGT': ['CGGTA'],
    'CGGTA': ['GGTAC'],
    'GGTAC': ['GTACC'],
    'GTACC': ['TACCC'],
    'TACCC': ['ACCCG'],
}
downstream, upstream = lib.assembly_graph.build_full_from_seed_graph(seed)

observed_depth = lib.assembly_graph.add_reverse_complement_depth({
    'ACCCG': 1,
    'CCCGG': 1,
    'CCGGT': 1,
    'CGGTA': 1,
    'GGTAC': 10,
    'GTACC': 10,
    'TACCC': 1,
})

assert lib.assembly_graph.mapping_all_upstream(upstream)
upstream, observed_depth

# Attempt to do this with matrix operations

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

depth0 = pd.Series(observed_depth).astype(float)
n = len(depth0)
depth0

In [None]:
# "Directed" graph with each node pointing to nodes that are downstream.
# If I want a graph of each node pointing upstream, that's just dgraph.T
# After the *first* step, where I distribute each nodes depth equally upstream
# and downstream, I'm going to need two contingency tables, because
# uflow != dflow.T

linked_5p_to_3p = downstream

dgraph_5p_to_3p = pd.DataFrame(np.zeros((n, n)), index=depth0.index, columns=depth0.index)
for k in dgraph_5p_to_3p:
    dgraph_5p_to_3p.loc[k, linked_5p_to_3p[k]] = 1

In [None]:
l_contingency = dgraph_5p_to_3p.copy()
r_contingency = dgraph_5p_to_3p.T.copy()

sns.heatmap(l_contingency)

In [None]:
# Initialization

send_to_r = r_contingency
send_to_l = l_contingency

# This is not actually the flow,
# because it's not proportional to depth...yet.

depth = depth0

total_from_l = send_to_r.sum()
total_from_r = send_to_l.sum()

# When it's not initialization
# we'll update depth here.

send_to_r_next = (send_to_l / total_from_r).multiply(depth, axis=1).T
send_to_l_next = (send_to_r / total_from_l).multiply(depth, axis=1).T

# Initialization done
send_to_r = send_to_r_next
send_to_l = send_to_l_next

sns.heatmap(send_to_r)

# table = pd.DataFrame(dict(depth=depth, in_degree_r=total_from_r, in_degree_l=total_from_l, out_degree_r=send_to_r.sum(1), out_degree_l=send_to_l.sum(1)))
# table

In [None]:
# One Step
total_from_l = send_to_r.sum()
total_from_r = send_to_l.sum()

gamma = 0

# Update depth
depth = (depth0 * gamma + depth + total_from_r + total_from_l) / (3 + gamma)
# from_r_log1p = sp.special.log1p(total_from_r)
# from_l_log1p = sp.special.log1p(total_from_l)
# disagreement = np.abs(from_r_log1p - from_l_log1p)
# suggestion = np.exp((from_r_log1p + from_l_log1p) / 2) - 1
# weight = 1 / (2 + disagreement**2)
# depth = (depth + suggestion * weight) / (1 + weight)

send_to_r_next = (send_to_l / total_from_r).multiply(depth, axis=1).T
send_to_l_next = (send_to_r / total_from_l).multiply(depth, axis=1).T

send_to_r = send_to_r_next
send_to_l = send_to_l_next

# Step done

sns.heatmap(send_to_l)
# plt.plot(depth)
# plt.ylim(0)

# table = pd.DataFrame(dict(depth=depth, in_degree_r=total_from_r, in_degree_l=total_from_l, out_degree_r=send_to_r.sum(1), out_degree_l=send_to_l.sum(1)))
# table