In [None]:
from collections import defaultdict
from statistics import mean, stdev
from math import sqrt, exp, prod, log, log1p
from pprint import pprint

In [None]:
COMPLEMENT = {
    'A': 'T',
    'C': 'G',
    'G': 'C',
    'T': 'A',
}
def complement(a):
    return COMPLEMENT[a]

def reverse_complement(kmer):
    return ''.join(complement(a) for a in reversed(kmer))

def canonical(kmer):
    return min(kmer, reverse_complement(kmer))

def build_full_from_seed_graph(downstream_):
    upstream = defaultdict(list)
    downstream = defaultdict(list)
#     downstream.update(downstream_)
    for kmer in downstream_:
        rc_kmer = reverse_complement(kmer)
        for kmer_downstream in downstream_[kmer]:
            downstream[kmer].append(kmer_downstream)  # Kept for explanability; could have been done with update above.
            upstream[kmer_downstream].append(kmer)
            rc_kmer_downstream = reverse_complement(kmer_downstream)
            downstream[rc_kmer_downstream].append(rc_kmer)
            upstream[rc_kmer].append(rc_kmer_downstream)
    return downstream, upstream

def add_reverse_complement_depth(depth_):
    depth = defaultdict(lambda: 0)
    for kmer in depth_:
        depth[kmer] = depth_[kmer]
        depth[reverse_complement(kmer)] = depth_[kmer]
    return depth

def is_ordered(upstream, downstream):
    return upstream[1:] == downstream[:-1]
    
def mapping_all_upstream(graph):
    for k in graph:
        for u in graph[k]:
            if not is_ordered(u, k):
                return False
    return True

In [None]:
seed = {
    'TACG': ['ACGC', 'ACGG'],
    'ACGG': ['CGGT'],
    'TCGG': ['CGGT'],
}
downstream, upstream = build_full_from_seed_graph(seed)

observed_depth = add_reverse_complement_depth({
    'ACGC': 4,
    'TACG': 9,
    'ACGG': 5,
    'CGGT': 7,
    'TCGG': 2,
})

In [None]:
assert mapping_all_upstream(upstream)
upstream, observed_depth

In [None]:
def node_action(current_depth_estimate, messages_from_upstream, messages_from_downstream, graph_upstream, graph_downstream):
    # Find out if there's a discrepancy between what upstream wants to send
    # what downstream wants to accept, and what the nodes own depth estimate is.
    # The node needs, sum(accept) = sum(send) = self-depth.
    # So it will definitely accept and send the same amount as self-depth,
    # however, it'll update its own depth first based on the suggestions it's receiving
    # and then it'll send/accept depth from the local nodes
    # proportional to what it received.
    total_from_upstream = sum([messages_from_upstream[k] for k in upstream)

def update_all(upstream, downstream, current_depth):
    current_depth = current_depth.copy()
    message_to_upstream = defaultdict(lambda: [])
    message_to_downstream = defaultdict(lambda: [])
    
    for k in current_depth:
        up_total = sum([current_depth[x] for x in upstream[k]])
        down_total = sum([current_depth[x] for x in downstream[k]])
        message_to_upstream[k] = log1p(current_depth[k]) - log1p(up_total)
        message_to_downstream[k] = log1p(current_depth[k]) - log1p(down_total)

    new_depth = defaultdict(lambda: 0)
    for k in current_depth:
        messages_from_upstream = [message_to_downstream[x] for x in upstream[k]]
        messages_from_downstream = [message_to_upstream[x] for x in downstream[k]]

        sum_upstream = sum(messages_from_upstream)
        sum_downstream = sum(messages_from_downstream)
        mean_suggestion = mean([sum_upstream, sum_downstream])
        disagreement = (sum(messages_from_upstream) - sum(messages_from_downstream))
        update = mean_suggestion / (2 + disagreement**2)
        new_depth[k] = current_depth[k] * exp(update)
        print(f'{k} {current_depth[k]:+0.2f} {mean_suggestion:+0.2f} {disagreement:+0.2f} {update:+0.2f} {new_depth[k]:+0.2f}')

    return new_depth

In [None]:


current_depth = observed_depth.copy()
for _ in range(1):
    current_depth = one_pass_update(upstream, downstream, current_depth)
pprint(current_depth)

In [None]:
seed = {
    'ACCCG': ['CCCGG'],
    'CCCGG': ['CCGGT'],
    'CCGGT': ['CGGTA'],
    'CGGTA': ['GGTAC'],
    'GGTAC': ['GTACC'],
    'GTACC': ['TACCC'],
    'TACCC': ['ACCCG'],
}
downstream, upstream = build_full_from_seed_graph(seed)

observed_depth = add_reverse_complement_depth({
    'ACCCG': 1,
    'CCCGG': 1,
    'CCGGT': 1,
    'CGGTA': 1,
    'GGTAC': 1,
    'GTACC': 1,
    'TACCC': 1,
})

downstream

In [None]:
current_depth = observed_depth.copy()
for _ in range(1):
    current_depth = one_pass_update(upstream, downstream, current_depth)
pprint(current_depth)

In [None]:
import pandas as pd
import numpy as np

In [None]:
depth0 = pd.Series(observed_depth)
n = len(depth0)
depth0

In [None]:
np.zeros(())