In [None]:
%load_ext autoreload
%load_ext line_profiler

In [None]:
import os as _os
_os.chdir(_os.environ['PROJECT_ROOT'])

In [None]:
import strainzip as sz
import graph_tool as gt
import graph_tool.draw
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pandas as pd
from contextlib import contextmanager
import xarray as xr
from itertools import product
from tqdm import tqdm

In [None]:
draw_graphs = True

length_bins = np.logspace(0, 6.5, num=51)
depth_bins = np.logspace(-1, 4, num=51)

In [None]:
# Load depth data
depth_table = xr.load_dataarray('examples/xjin_test4/r.proc.kmtricks-k111-m3-r2.ggcat.unitig_depth.nc')
depth_table.sizes

In [None]:
with open('examples/xjin_test4/r.proc.kmtricks-k111-m3-r2.ggcat.fn') as f:
    _, seqs = sz.io.load_graph_and_sequences_from_linked_fasta(f, k=111, header_tokenizer=sz.io.ggcat_header_tokenizer)

In [None]:
nsample = depth_table.sizes['sample']

In [None]:
# Load graph
graph = sz.io.load_graph('examples/xjin_test4/r.proc.kmtricks-k111-m3-r2.ggcat.gt')

# Set depth on graph
vertex_unitig_order = [int(s[:-1]) for s in graph.vp['sequence']]
graph.vp['depth'] = graph.new_vertex_property('vector<float>')
graph.vp['depth'].set_2d_array(depth_table.sel(unitig=vertex_unitig_order).T.values)

In [None]:
# Select components in a deterministic way (from largest to smallest).

component_graphs = []

graph_remaining = graph.new_vertex_property('bool', val=True)

last_graph_size = 1_000_000
while last_graph_size > 1000:
    this_component = gt.topology.label_largest_component(gt.GraphView(graph, vfilt=graph_remaining), directed=False)
    component_graphs.append(gt.GraphView(graph, vfilt=this_component))
    graph_remaining = graph.new_vertex_property('bool', vals=graph_remaining.a - this_component.a)
    last_graph_size = this_component.a.sum()

len(component_graphs)

In [None]:
# The largest components has a huge fraction of the unitigs
component_graphs[0], component_graphs[1]

In [None]:
c = 7
# component = c
component = 9  # Only the label for plotting

graph2 = gt.Graph(component_graphs[c], prune=True)
graph2.set_vertex_filter(graph2.vp['filter'])

np.random.seed(1)
gt.seed_rng(1)

In [None]:
if draw_graphs:
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases)
    gm = sz.graph_manager.GraphManager(
        unzippers=[
            sz.graph_manager.LengthUnzipper(),
            sz.graph_manager.SequenceUnzipper(),
            sz.graph_manager.VectorDepthUnzipper(),
            sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
        ],
        pressers=[
            sz.graph_manager.LengthPresser(),
            sz.graph_manager.SequencePresser(sep=","),
            sz.graph_manager.VectorDepthPresser(),
            sz.graph_manager.PositionPresser(),
        ],
    )
else:
    gm = sz.graph_manager.GraphManager(
        unzippers=[
            sz.graph_manager.LengthUnzipper(),
            sz.graph_manager.SequenceUnzipper(),
            sz.graph_manager.VectorDepthUnzipper(),
        ],
        pressers=[
            sz.graph_manager.LengthPresser(),
            sz.graph_manager.SequencePresser(sep=","),
            sz.graph_manager.VectorDepthPresser(),
        ],
    )
gm.validate(graph2)

In [None]:
graph3 = graph2.copy()  # Save for later plotting
sz.stats.degree_stats(graph3)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(graph2.vp['depth'], pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 0

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    graph2.vp['depth'].get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    _color = graph2.new_vertex_property('float', vals=graph2.vp['depth'].get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
# Trim tips
tips = sz.assembly.find_tips(graph2, also_required=graph2.vp['length'].a < 111)
print(len(tips))
gm.batch_trim(graph2, tips)

_new_tigs = gm.batch_press(graph2, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph2)])
len(_new_tigs)

In [None]:
# Second round of tip trimming
tips = sz.assembly.find_tips(graph2, also_required=graph2.vp['length'].a < 111)
print(len(tips))
gm.batch_trim(graph2, tips)
_new_tigs = gm.batch_press(graph2, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph2)])
print(len(_new_tigs))

In [None]:
sz.stats.degree_stats(graph2)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(graph2.vp['depth'], pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 1

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    graph2.vp['depth'].get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    
    _color = graph2.new_vertex_property('float', vals=graph2.vp['depth'].get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
# Depth Smoothing

smoothed_depths = []
for i in range(nsample):
    one_depth = gt.ungroup_vector_property(graph2.vp.depth, pos=[i])[0]
    smoothed, _change = sz.flow.smooth_depth(graph2, one_depth, graph2.vp.length, inertia=0.5, num_iter=50)
    print(_change)
    smoothed_depths.append(smoothed)

smoothed_depths = gt.group_vector_property(smoothed_depths)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(smoothed_depths, pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 2

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    smoothed_depths.get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * smoothed_depths.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    
    _color = graph2.new_vertex_property('float', vals=smoothed_depths.get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
# Actually push smoothed depths to the graph
graph2.vp['depth'] = smoothed_depths  # TODO: Experiment with and without this.

# FIXME: Long tips lose too much depth?

In [None]:
junctions = sz.assembly.find_junctions(graph2)
print(len(junctions))

batch = []
pbar = tqdm(ncols=2, disable=True)
for j in junctions:
    in_edge_vertices = [edge[0] for edge in graph2.get_in_edges(j)]
    out_edge_vertices = [edge[1] for edge in graph2.get_out_edges(j)]
    in_edge_flows = np.stack([flow[edge] for edge in graph2.get_in_edges(j)])
    out_edge_flows = np.stack([flow[edge] for edge in graph2.get_out_edges(j)])
    n, m = len(in_edge_vertices), len(out_edge_vertices)
    pbar.set_postfix({'NxM': f"{n}x{m}"})
    inv_beta_hessian, paths, weights, delta_aic = sz.deconvolution.deconvolve_junction(
        in_edge_vertices,
        in_edge_flows,
        out_edge_vertices,
        out_edge_flows,
        model=sz.depth_model,
        forward_stop=0.2,
        backward_stop=0.01,
        alpha=1e-5,
    )
    batch.append((j, paths, {"path_depths": weights}, inv_beta_hessian))
    pbar.update(1)

print(len(batch) / len(junctions))

In [None]:
batch[1]

In [None]:
j = 23
in_edge_vertices = [edge[0] for edge in graph2.get_in_edges(j)]
out_edge_vertices = [edge[1] for edge in graph2.get_out_edges(j)]
in_edge_flows = np.stack([flow[edge] for edge in graph2.get_in_edges(j)])
out_edge_flows = np.stack([flow[edge] for edge in graph2.get_out_edges(j)])
n, m = len(in_edge_vertices), len(out_edge_vertices)

sns.heatmap(np.concatenate([in_edge_flows, out_edge_flows], axis=0), norm=mpl.colors.SymLogNorm(0.1, vmin=-1e3, vmax=1e3), cmap='coolwarm')
in_edge_vertices, out_edge_vertices

In [None]:
inv_beta_hessian, paths, weights, delta_aic = sz.deconvolution.deconvolve_junction(
    in_edge_vertices,
    in_edge_flows,
    out_edge_vertices,
    out_edge_flows,
    model=sz.depth_model,
    forward_stop=0.2,
    backward_stop=0.01,
    verbose=2,
    alpha=1e-5,
)
paths, delta_aic

In [None]:
sns.heatmap(weights, norm=mpl.colors.SymLogNorm(0.1, vmin=-1e3, vmax=1e3), cmap='coolwarm')


In [None]:
sns.heatmap(inv_beta_hessian, norm=mpl.colors.SymLogNorm(1e-1, vmin=-300, vmax=300), cmap='coolwarm')

In [None]:
_new_tigs = gm.batch_unzip(graph2, *batch)
print(len(_new_tigs))

_new_tigs = gm.batch_press(graph2, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph2)])
len(_new_tigs)

In [None]:
sz.stats.degree_stats(graph2)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(graph2.vp['depth'], pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 3

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    graph2.vp['depth'].get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    
    _color = graph2.new_vertex_property('float', vals=graph2.vp['depth'].get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
junctions = sz.assembly.find_junctions(graph2)
print(len(junctions))

batch = []
pbar = tqdm(ncols=2, disable=True)
for j in junctions:
    in_edge_vertices = [edge[0] for edge in graph2.get_in_edges(j)]
    out_edge_vertices = [edge[1] for edge in graph2.get_out_edges(j)]
    in_edge_flows = np.stack([flow[edge] for edge in graph2.get_in_edges(j)]).T
    out_edge_flows = np.stack([flow[edge] for edge in graph2.get_out_edges(j)]).T
    n, m = len(in_edge_vertices), len(out_edge_vertices)
    pbar.set_postfix({'NxM': f"{n}x{m}"})
    if n * m <= 6:
        delta_score, paths, weights = sz.deconvolution.estimate_path_weights(
            in_edge_vertices,
            in_edge_flows,
            out_edge_vertices,
            out_edge_flows,
            score_func=sz.deconvolution.aic_score,
            # score_kwargs=dict(penalty=1),
        )
        if (delta_score < -3) and (len(paths) < n + m):
            batch.append((j, paths, {"path_depths": weights}))
    pbar.update(1)

print(len(batch) / len(junctions))

In [None]:
_new_tigs = gm.batch_unzip(graph2, *batch)
print(len(_new_tigs))

_new_tigs = gm.batch_press(graph2, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph2)])
len(_new_tigs)

In [None]:
sz.stats.degree_stats(graph2)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(graph2.vp['depth'], pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 4

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    graph2.vp['depth'].get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    
    _color = graph2.new_vertex_property('float', vals=graph2.vp['depth'].get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
junctions = sz.assembly.find_junctions(graph2)
print(len(junctions))

batch = []
pbar = tqdm(ncols=2, disable=True)
for j in junctions:
    in_edge_vertices = [edge[0] for edge in graph2.get_in_edges(j)]
    out_edge_vertices = [edge[1] for edge in graph2.get_out_edges(j)]
    in_edge_flows = np.stack([flow[edge] for edge in graph2.get_in_edges(j)]).T
    out_edge_flows = np.stack([flow[edge] for edge in graph2.get_out_edges(j)]).T
    n, m = len(in_edge_vertices), len(out_edge_vertices)
    pbar.set_postfix({'NxM': f"{n}x{m}"})
    if n * m <= 6:
        delta_score, paths, weights = sz.deconvolution.estimate_path_weights(
            in_edge_vertices,
            in_edge_flows,
            out_edge_vertices,
            out_edge_flows,
            score_func=sz.deconvolution.aic_score,
            # score_kwargs=dict(penalty=1),
        )
        if (delta_score < -3) and (len(paths) < n + m):
            batch.append((j, paths, {"path_depths": weights}))
    pbar.update(1)

print(len(batch) / len(junctions))

In [None]:
_new_tigs = gm.batch_unzip(graph2, *batch)
print(len(_new_tigs))

_new_tigs = gm.batch_press(graph2, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph2)])
len(_new_tigs)

In [None]:
sz.stats.degree_stats(graph2)

In [None]:
# Calculate Flows
flow = []
for sample_id in range(nsample):
    one_flow, _, _, = sz.flow.estimate_flow(graph2, gt.ungroup_vector_property(graph2.vp['depth'], pos=[sample_id])[0], graph2.vp['length'])
    flow.append(one_flow)
flow = gt.group_vector_property(flow, pos=range(nsample))

In [None]:
assembly_stage = 5

# Initial depths
plt.hist2d(
    graph2.vp['length'].fa,
    graph2.vp['depth'].get_2d_array(range(nsample)).sum(0),
    bins=(length_bins, depth_bins),
    norm=mpl.colors.LogNorm(vmin=1, vmax=1e3),
)
plt.colorbar()
plt.xscale('log')
plt.yscale('log')
plt.savefig(f'nb/fig/component-{component}/hist_stage{assembly_stage}.pdf')

if draw_graphs:
    # Update positions
    total_bases = graph2.new_vertex_property('float', vals=graph2.vp.length.fa * graph2.vp.depth.get_2d_array(pos=range(nsample)).sum(0))
    sz.draw.update_xypositions(graph2, vweight=total_bases, max_iter=100, init_step=1)
    
    _color = graph2.new_vertex_property('float', vals=graph2.vp['depth'].get_2d_array(range(nsample)).sum(0) ** (1/2))
    _width = graph2.new_edge_property('float', vals=flow.get_2d_array(range(nsample)).sum(0) ** (1/2) / 2)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_fill_color=_color,
        # edge_color=flow,
        edge_pen_width=_width,
        output=f'nb/fig/component-{component}/graph_stage{assembly_stage}.pdf',
        vcmap=(mpl.cm.magma),
    )

In [None]:
graph2_sequences = list(graph2.vp.sequence)
graph3_sequences = list(graph3.vp.sequence)

sequence_multiplicity = {}
sequence_length = {}
for s in graph3_sequences:
    sequence_multiplicity[s] = len([t for t in graph2_sequences if s in t])
    sequence_length[s] = len(seqs[s[:-1]])

mean_component_multiplicity = {}
for s in graph2_sequences:
    mean_component_multiplicity[s] = np.sqrt(np.mean([sequence_multiplicity[t]**2 for t in s.split(',')]))

In [None]:
num_segments = graph2.new_vertex_property('int')
gt.map_property_values(graph2.vp.sequence, num_segments, lambda x: len(x.split(',')))
length = graph2.vp.length

list(sorted(zip(
    num_segments.a[graph2.get_vertices()],
    graph2.vp.length.a[graph2.get_vertices()],
    [mean_component_multiplicity[graph2.vp.sequence[v]] for v in graph2.get_vertices()],
    graph2.get_vertices(),
)))[-200:]

In [None]:
v = 217708  # [ 855 1047 1052 1053]

print(v)
print(graph2.vp.length[v])
print(graph2.vp.depth[v])
print(graph2.vp.sequence[v])

sns.heatmap(depth_table.sel(unitig=[int(s[:-1]) for s in graph2.vp.sequence[v].split(',')]).to_pandas().T, norm=mpl.colors.SymLogNorm(1))

In [None]:
k = 111

for segment in graph2.vp.sequence[v].split(','):
    print(segment, sequence_multiplicity[segment], sequence_length[segment])

In [None]:
assembly_stage = '_final'

# Flag nodes in sequence v
in_seq = graph3.new_vertex_property('bool', val=False)
gt.map_property_values(graph3.vp.sequence, in_seq, lambda x: x in graph2.vp.sequence[v].split(','))
print(in_seq.a.sum())

one_depth = graph3.new_vertex_property('float', graph3.vp['depth'].get_2d_array(pos=range(nsample)).mean(0))
one_flow, _, _, = sz.flow.estimate_flow(graph3, one_depth, graph3.vp['length'])
_color = graph3.new_vertex_property('float', vals=np.sqrt(one_depth.a))

if draw_graphs:
    outpath = f'nb/fig/component-{component}/graph_stage{assembly_stage}_seq{v}_id.pdf'
    print(outpath)
    sz.draw.draw_graph(
        graph3,
        vertex_text=graph3.vp['sequence'],
        vertex_halo=in_seq,
        # vertex_text=in_seq,
        vertex_font_size=1,
        vertex_fill_color=_color,
        edge_pen_width=graph3.new_edge_property('float', vals=one_flow.a ** (1/5)),
        output=outpath,
        vcmap=(mpl.cm.magma, 1),
    )

# sz.draw.draw_graph(
#     graph3,
#     vertex_text=graph3.vp['length'],
#     vertex_halo=in_seq,
#     # vertex_text=in_seq,
#     vertex_fill_color=_color,
#     edge_pen_width=graph3.new_edge_property('float', vals=one_flow.a ** (1/5)),
#     output=f'nb/fig/component-{component}/assembly{assembly_stage}_seq{v}_length.pdf',
#     vcmap=(mpl.cm.magma, 1),
# )

In [None]:
assembly_stage = '_final'

# Flag assembled sequences with component u
u = "32505"
has_specific_component = graph2.new_vertex_property('bool')
gt.map_property_values(graph2.vp.sequence, has_specific_component, lambda x: u in [v[:-1] for v in x.split(',')])

one_depth = graph2.new_vertex_property('float', graph2.vp['depth'].get_2d_array(pos=range(nsample)).sum(0))
one_flow, _, _, = sz.flow.estimate_flow(graph2, one_depth, graph2.vp['length'])
_color = graph2.new_vertex_property('float', vals=np.sqrt(one_depth.a))

if draw_graphs:
    outpath = f'nb/fig/component-{component}/graph_stage{assembly_stage}_node{u}.pdf'
    print(outpath)
    sz.draw.draw_graph(
        graph2,
        vertex_text=graph2.vp['length'],
        vertex_halo=has_specific_component,
        # vertex_text=in_seq,
        vertex_fill_color=_color,
        edge_pen_width=graph2.new_edge_property('float', vals=one_flow.a ** (1/5)),
        output=outpath,
        vcmap=(mpl.cm.magma, 1),
    )

print(np.where(has_specific_component.a)[0])

In [None]:
k = 111

path = f'nb/fig/component-{component}/seqs_stage{assembly_stage}_node{u}.fn'
print(path)
with open(path, 'w') as f:
    for seq in np.where(has_specific_component.a)[0]:
        accum = ""
        segment_str = graph2.vp.sequence[seq]
        for segment in segment_str.split(','):
            # print(segment)
            seqidx, strand = segment[:-1], segment[-1:]
            forward_segment = seqs[seqidx]
            # print(len(forward_segment))
            if strand == '+':
                accum = accum[:-(k - 1)] + forward_segment
            else:
                accum = accum[:-(k - 1)] + sz.sequence.reverse_complement(forward_segment)
        print(seq, segment_str, len(accum), sep='\t')
        print(f">{seq}|{segment_str}\n{accum}", file=f)

In [None]:
d = pd.DataFrame(graph2.vp.depth.get_2d_array(pos=range(nsample))[:,np.where(has_specific_component.fa)[0]], columns=np.where(has_specific_component.a)[0])
sns.heatmap(d, norm=mpl.colors.SymLogNorm(1))

In [None]:
graph4 = gt.GraphView(graph2, skip_vfilt=True)
[(v, l, d, s) for v, (s, l, d) in enumerate(zip(graph4.vp['sequence'], graph4.vp['length'], graph4.vp.depth.get_2d_array(pos=range(nsample)).sum(0))) if '32505+' in s.split(',')]

In [None]:
assert False

In [None]:
sz.draw.draw_graph(
    graph2,
    vertex_text=graph2.vertex_index,
    vertex_fill_color=_color,
    # edge_color=flow,
    edge_pen_width=graph2.new_edge_property('float', vals=one_flow.a ** (1/3)),
    output=f'nb/fig/test.pdf',
    vcmap=(mpl.cm.magma),
)

In [None]:
junctions = sz.assembly.find_junctions(graph2)
j = junctions[1]

in_edge_vertices = [edge[0] for edge in graph2.get_in_edges(j)]
out_edge_vertices = [edge[1] for edge in graph2.get_out_edges(j)]
in_edge_flows = np.stack([flow[edge] for edge in graph2.get_in_edges(j)]).T
out_edge_flows = np.stack([flow[edge] for edge in graph2.get_out_edges(j)]).T

delta_aic, paths, weights = sz.deconvolution.estimate_path_weights(in_edge_vertices, in_edge_flows, out_edge_vertices, out_edge_flows)
print(in_edge_flows)
print(out_edge_flows)
print(delta_aic)
print(paths)
print(weights)

In [None]:
in_edge_flows, out_edge_flows

In [None]:
flow[(1426, 0)]

In [None]:
in_edge_flows = [(edge[0], flow[edge]) for edge in in_edges]
in_edge_flows

In [None]:
# Stop Here
assert False

In [None]:
plt.hist(depth.sum("sample"), bins=np.logspace(0, 7, num=101))
plt.xscale('log')
# plt.yscale('log')

In [None]:
graph_component = gt.Graph(gt.GraphView(graph, vfilt=graph.new_vertex_property('bool', vals=graph_component_vp.a == 46)), prune=True)
graph_component.vp

In [None]:
np.argsort(graph.vp.length.a)[-10::-1]

In [None]:
# Get graph of the largest component and name it by the longest unitig.
graph_component = gt.topology.extract_largest_component(graph, directed=False, prune=True)
graph_component.vp.sequence[np.argmax(graph_component.vp.length.a)]

In [None]:
sz.io.dump_graph(graph_component, "examples/xjin_test3.kmtricks-k111-m3-r2.ggcat.component-root-1291352.gt")

In [None]:
vertex_sequence_set = list({int(s[:-1]) for s in graph_component.vp.sequence})
depth.sel(unitig=vertex_sequence_set).to_netcdf('examples/xjin_test3.kmtricks-k111-m3-r2.ggcat.component-root-1291352.unitig_depth.nc')

In [None]:
graph.vp['sequence'][25701]

In [None]:
radius = 10_000_000
root = 25701

dist = sz.assembly.get_shortest_length(graph, root, radius, graph.vp['length'])
within_radius = graph.new_vertex_property('bool', vals=dist.a < radius)

g2 = gt.GraphView(graph, vfilt=within_radius)
dist2 = g2.own_property(dist)
g2.vp['dist_from_root'] = dist2

pos = sz.draw.draw_graph(g2, vertex_aspect=1, vertex_fill_color=g2.vp['dist_from_root'], vertex_text=g2.vp['dist_from_root'], output='nb/fig/test.pdf')
g2.vp['xyposition'] = pos

In [None]:
g3 = gt.Graph(g2, prune=True)

In [None]:
sz.draw.draw_graph(g3, vertex_aspect=1, vertex_fill_color=g3.vp['dist_from_root'], vertex_text=g3.vp['dist_from_root'], output='nb/fig/test.pdf', pos=g3.vp['xyposition'])

In [None]:
sz.io.dump_graph(g3, "examples/xjin_test3.kmtricks-k111-m3-r2.ggcat.component-root-352895.gt")

In [None]:
g3

In [None]:
vertex_sequence_order = [int(s[:-1]) for s in g3.vp['sequence']]

In [None]:
len(set(vertex_sequence_order) - set(depth.unitig.values))

In [None]:
depth.sel(unitig=vertex_sequence_order).to_netcdf('examples/xjin_test3.kmtricks-k111-m3-r2.ggcat.component-root-352895.unitig_depth.nc')

In [None]:
import graph_tool.search

g = gt.Graph([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)], directed=True)
weights = g.new_vertex_property('int', val=1)

dist = sz.assembly.select_local_region(g, 2, 2, weights)

In [None]:
g.own_property(dist).a

In [None]:
@contextmanager
def unfiltered(graph):
    filt = graph.get_vertex_filter()
    graph.set_vertex_filter(None)
    yield
    graph.set_vertex_filter(*filt)

In [None]:
_graph = gt.Graph()
_graph.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)])
num_vertices = _graph.num_vertices(ignore_filter=True)

_length = _graph.new_vertex_property("int", val=1)
_sequence = _graph.new_vertex_property("string")
_depth = _graph.new_vertex_property("float")
_xyposition = _graph.new_vertex_property("vector<float>")
_filter = _graph.new_vertex_property("bool", val=1)

# Initialize position info
offset_scale = 0.1
xyposition = np.empty((2, num_vertices))
xyposition[0, :] = np.arange(num_vertices)
xyposition[1, :] = 0
_xyposition = _graph.new_vertex_property("vector<float>")
_xyposition.set_2d_array(xyposition, pos=[0, 1])

_graph.vp["depth"] = _depth
_graph.vp["length"] = _length
_graph.vp["sequence"] = _sequence
_graph.vp["xyposition"] = _xyposition
_graph.vp["filter"] = _filter

_graph.set_vertex_filter(_graph.vp["filter"])

gm = sz.graph_manager.GraphManager(
    unzippers=[
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(_graph)
sz.draw.draw_graph(_graph)

gm.batch_unzip(
        _graph,
        (3, [(2, 4), (2, 4)], {"path_depths": [0, 0]}),
        (4, [(3, 5), (3, 5)], {"path_depths": [0, 0]}),
        (5, [(4, 6), (4, 6)], {"path_depths": [0, 0]}),
)
sz.draw.draw_graph(_graph)
# # gm.unzip(_graph, 3, [(2, 4)], path_depths=[0])
# # gm.unzip(_graph, 3, [(2, 4)], path_depths=[0]) # Should be equivalent to the above
# # gt.draw.graph_draw(gt.GraphView(_graph, vfilt=_graph.vp['filter']), pos=_graph.vp['xyposition'], ink_scale=0.35, vertex_text=_graph.vertex_index)
# # print(_graph.vp['xyposition'].get_2d_array(pos=[0, 1]))
# with unfiltered(_graph):
#     assert np.array_equal(
#         _graph.vp["xyposition"].get_2d_array(pos=[0, 1]),
#         np.array(
#             [[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ,  2.9,  3.1,  4.9,
#      5.1],
#    [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.1,  0.1, -0.1,
#      0.1]]
#         ),
#     )

# gm.batch_press(
#     _graph,
#     ([0, 1, 2], {}),
#     ([6, 7], {}),
# )
# sz.draw.draw_graph(_graph)
# # gm.press(_graph, [0, 1, 2])
# # gm.press(_graph, [6, 7]) # Should be equivalent to the above
# # gt.draw.graph_draw(gt.GraphView(_graph, vfilt=_graph.vp['filter']), pos=_graph.vp['xyposition'], ink_scale=0.35, vertex_text=_graph.vertex_index)
# # print(repr(_graph.vp['xyposition'].get_2d_array(pos=[0, 1])))
# with unfiltered(_graph):
#     assert np.array_equal(
#         _graph.vp["xyposition"].get_2d_array(pos=[0, 1]),
#         np.array(
#             [[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ,  2.9,  3.1,  4.9,
#      5.1,  1. ,  6.5],
#    [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.1,  0.1, -0.1,
#      0.1,  0. ,  0. ]]
#         ),
#     )


In [None]:
_graph = gt.Graph()
_graph.add_edge_list([(0, 1), (1, 2), (3, 1), (2, 4), (2, 5)])
# gt.draw.graph_draw(gt.GraphView(_graph), ink_scale=0.35, vertex_text=_graph.vertex_index)

_graph.vp["filter"] = _graph.new_vertex_property("bool", val=True)
_graph.vp['length'] = _graph.new_vertex_property("int", val=1)
_graph.set_vertex_filter(_graph.vp["filter"])

pos = sz.draw.draw_graph(_graph)
_graph.vp['xyposition'] = pos
gm = sz.graph_manager.GraphManager(
    unzippers=[sz.graph_manager.PositionUnzipper(), sz.graph_manager.LengthUnzipper()],
    pressers=[sz.graph_manager.PositionPresser(), sz.graph_manager.LengthPresser()],
)
gm.validate(_graph)

gm.batch_unzip(
    _graph,
    (2, [(1, 5), (1, 4)], {}),
    (1, [(0, 2), (3, 2)], {}),
)

sz.draw.draw_graph(_graph)

# # sz.draw.draw_graph(_graph)
# # print(repr(sz.stats.degree_stats(_graph).reset_index().values))
print(repr(sz.stats.degree_stats(_graph).reset_index().values))
assert np.array_equal(
    sz.stats.degree_stats(_graph).reset_index().values,
    [[0., 1., 2.],
       [1., 0., 2.],
       [1., 2., 2.],
       [2., 1., 2.]],
)

In [None]:
    _graph = gt.Graph()
    _graph.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)])
    num_vertices = _graph.num_vertices(ignore_filter=True)

    _length = _graph.new_vertex_property("int", val=1)
    _sequence = _graph.new_vertex_property("string")
    _depth = _graph.new_vertex_property("float")
    _xyposition = _graph.new_vertex_property("vector<float>")
    _filter = _graph.new_vertex_property("bool", val=1)

    # Initialize position info
    offset_scale = 0.1
    xyposition = np.empty((2, num_vertices))
    xyposition[0, :] = np.arange(num_vertices)
    xyposition[1, :] = 0
    _xyposition = _graph.new_vertex_property("vector<float>")
    _xyposition.set_2d_array(xyposition, pos=[0, 1])

    _graph.vp["depth"] = _depth
    _graph.vp["length"] = _length
    _graph.vp["sequence"] = _sequence
    _graph.vp["xyposition"] = _xyposition
    _graph.vp["filter"] = _filter

    _graph.set_vertex_filter(_graph.vp["filter"])

    gm = sz.graph_manager.GraphManager(
        unzippers=[
            sz.graph_manager.LengthUnzipper(),
            sz.graph_manager.SequenceUnzipper(),
            sz.graph_manager.ScalarDepthUnzipper(),
            sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
        ],
        pressers=[
            sz.graph_manager.LengthPresser(),
            sz.graph_manager.SequencePresser(sep=","),
            sz.graph_manager.ScalarDepthPresser(),
            sz.graph_manager.PositionPresser(),
        ],
    )
    gm.validate(_graph)
    # gt.draw.graph_draw(gt.GraphView(_graph, vfilt=_graph.vp['filter']), pos=_graph.vp['xyposition'], ink_scale=0.35, vertex_text=_graph.vertex_index)
    # print(_graph.vp['xyposition'].get_2d_array(pos=[0, 1]))
    sz.draw.draw_graph(_graph)

    gm.batch_unzip(
        _graph,
        (3, [(2, 4), (2, 4)], {"path_depths": [0, 0]}),
        (4, [(3, 5), (3, 5)], {"path_depths": [0, 0]}),
        (5, [(4, 6), (4, 6)], {"path_depths": [0, 0]}),
    )
    sz.draw.draw_graph(_graph)

    # gm.unzip(_graph, 3, [(2, 4)], path_depths=[0])
    # gm.unzip(_graph, 3, [(2, 4)], path_depths=[0]) # Should be equivalent to the above
    # gt.draw.graph_draw(gt.GraphView(_graph, vfilt=_graph.vp['filter']), pos=_graph.vp['xyposition'], ink_scale=0.35, vertex_text=_graph.vertex_index)
    with unfiltered(_graph):
        # print(repr(_graph.vp['xyposition'].get_2d_array(pos=[0, 1])))
        assert np.array_equal(
            _graph.vp["xyposition"].get_2d_array(pos=[0, 1]),
            [[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ,  2.9,  3.1,  3.9,
         4.1,  4.9,  5.1],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.1,  0.1, -0.1,
         0.1, -0.1,  0.1]],
        )

    gm.batch_press(
        _graph,
        ([0, 1, 2], {}),
        ([6, 7], {}),
    )

    sz.draw.draw_graph(_graph)
    # gm.press(_graph, [0, 1, 2])
    # gm.press(_graph, [6, 7]) # Should be equivalent to the above
    # gt.draw.graph_draw(gt.GraphView(_graph, vfilt=_graph.vp['filter']), pos=_graph.vp['xyposition'], ink_scale=0.35, vertex_text=_graph.vertex_index)
    # print(repr(_graph.vp['xyposition'].get_2d_array(pos=[0, 1])))
    with unfiltered(_graph):
        # print(repr(_graph.vp['xyposition'].get_2d_array(pos=[0, 1])))
        assert np.array_equal(
            _graph.vp["xyposition"].get_2d_array(pos=[0, 1]),
            np.array(
                [[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ,  2.9,  3.1,  3.9,
         4.1,  4.9,  5.1,  1. ,  6.5],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.1,  0.1, -0.1,
         0.1, -0.1,  0.1,  0. ,  0. ]]
            ),
        )


In [None]:
graph = gt.Graph()
graph.add_edge_list([(0, 2), (1, 2), (2, 3), (2, 4)])
num_vertices = graph.num_vertices(ignore_filter=True)

_length = graph.new_vertex_property("int", val=1)
_sequence = graph.new_vertex_property("string")
_depth = graph.new_vertex_property("float", val=1)
_xyposition = graph.new_vertex_property("vector<float>")
_filter = graph.new_vertex_property("bool", val=1)

# Set depth observations:
_depth.a[0] = 10
_depth.a[2] = 5
_depth.a[3] = 2

# Initialize position info
xyposition = np.empty((2, num_vertices))
xyposition[0, :] = [0, 0, 1, 2, 2]
xyposition[1, :] = [0, 1, 0.5, 0, 1]
_xyposition = graph.new_vertex_property("vector<float>")
_xyposition.set_2d_array(xyposition, pos=[0, 1])

graph.vp["depth"] = _depth
graph.vp["length"] = _length
graph.vp["sequence"] = _sequence
graph.vp["xyposition"] = _xyposition
graph.vp["filter"] = _filter

graph.set_vertex_filter(graph.vp["filter"])

gm = sz.graph_manager.GraphManager(
    unzippers=[
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)


# Flow algo.
weight = graph.vp['length']
depth = graph.vp['depth']
flow = graph.new_edge_property('float', val=1)  # NOTE: Just for annotating the plot.
sz.draw.draw_graph(graph, edge_text=flow, vertex_text=depth)

flow = sz.flow.estimate_flow(graph, depth=graph.vp['depth'], weight=graph.vp['length'], eps=0.001, maxiter=1000)
        
sz.draw.draw_graph(graph, edge_text=flow, vertex_text=depth)

In [None]:
graph = gt.Graph()
graph.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0), (0, 4)])
filter = graph.new_vertex_property('bool', val=1)
graph.set_vertex_filter(filter)

length = graph.new_vertex_property("int", val=1)

depth = graph.new_vertex_property("float", val=1)
depth.a[4] = 1

pos = sz.draw.draw_graph(graph, vertex_text=depth)
flow = sz.flow.estimate_flow(
    graph, depth=depth, weight=length, eps=0.001, maxiter=1000
)
sz.draw.draw_graph(graph, pos=pos, vertex_text=depth, edge_text=flow)


filter[4] = 0
flow = sz.flow.estimate_flow(
    graph, depth=depth, weight=length, eps=0.001, maxiter=1000
)
sz.draw.draw_graph(graph, pos=pos, vertex_text=depth, edge_text=flow)
print(flow[(0, 1)] == 1)

In [None]:
def estimate_flow(graph, depth, weight, eps=0.001, maxiter=1000, use_weights=True):
    target_vertex_weight = gt.edge_endpoint_property(graph, weight, 'target')
    source_vertex_weight = gt.edge_endpoint_property(graph, weight, 'source')
    flow = graph.new_edge_property('float', val=1)
    flow.a[:] = 1
    loss_hist = [np.finfo('float').max]
    for _ in range(maxiter):
        total_in_flow = gt.incident_edges_op(graph, 'in', 'sum', flow)
        in_flow_error = graph.new_vertex_property('float', vals=depth.a - total_in_flow.a)
        target_vertex_total_inflow = gt.edge_endpoint_property(graph, total_in_flow, 'target')
        target_vertex_error = gt.edge_endpoint_property(graph, in_flow_error, 'target')
        with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
            target_vertex_alloc = np.nan_to_num(flow.a / target_vertex_total_inflow.a, posinf=1, nan=0)
        target_vertex_alloc_error = target_vertex_alloc * target_vertex_error.a

        total_out_flow = gt.incident_edges_op(graph, 'out', 'sum', flow)
        out_flow_error = graph.new_vertex_property('float', vals=depth.a - total_out_flow.a)
        source_vertex_total_outflow = gt.edge_endpoint_property(graph, total_out_flow, 'source')
        source_vertex_error = gt.edge_endpoint_property(graph, out_flow_error, 'source')
        with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
            source_vertex_alloc = np.nan_to_num(flow.a / source_vertex_total_outflow.a, posinf=1, nan=0)
        source_vertex_alloc_error = source_vertex_alloc * source_vertex_error.a

        loss_hist.append(np.square(in_flow_error.a).sum() + np.square(out_flow_error.a).sum())
        if loss_hist[-1] == 0:
            break  # This should only happen if d is all 0's.
        loss_ratio = (loss_hist[-2] - loss_hist[-1]) / loss_hist[-2]
        if loss_ratio < eps:
            break
        
        with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
            # NOTE: Some values of (source_vertex_weight.a + target_vertex_weight.a)
            # are 0 because these two edge_properties include edge indices
            # for non-existent edges.
            # TODO: Consider running gt.reindex_edges to get rid of these.
            mean_flow_error = graph.new_edge_property(
                'float',
                vals=(
                    (source_vertex_alloc_error * source_vertex_weight.a)
                    +
                    (target_vertex_alloc_error * target_vertex_weight.a)
                )
                / (source_vertex_weight.a + target_vertex_weight.a)
            )
        flow = graph.new_edge_property('float', vals=flow.a + mean_flow_error.a)
    return all_flows

In [None]:
%autoreload

In [None]:
%%time

np.random.seed(1)
gt.seed_rng(1)
sequence = sz.sequence.random_sequence(25)

graph = sz.build.annotated_dbg(sequence, k=5, circularize=True, include_rc=True)
sz.draw.update_xypositions(graph)

graph.set_vertex_filter(graph.vp['filter'])

sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

gm = sz.graph_manager.GraphManager(
    unzippers=[
        sz.graph_manager.FilterUnzipper(),
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        sz.graph_manager.FilterPresser(),
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)

print(len([(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph)]))
gm.batch_press(graph, *[(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph)])
print(len([(path, {}) for path in sz.assembly.iter_maximal_unitig_paths(graph)]))  # No length >1 unitigs remain.

sz.draw.update_xypositions(graph, init_step=1)
sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

sz.stats.degree_stats(graph)

In [None]:
%%time

np.random.seed(1)
gt.seed_rng(1)
sequence = sz.sequence.random_sequence(25)

graph = sz.build.annotated_dbg(sequence, k=5, circularize=True, include_rc=True)
graph.vp['depth'] = gt.group_vector_property([graph.vp['depth']]*2)  # Make it a vector depth.
sz.draw.update_xypositions(graph)

graph.set_vertex_filter(graph.vp['filter'])

sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['length'], vertex_aspect=1)

gm = sz.graph_manager.GraphManager(
    unzippers=[
        sz.graph_manager.FilterUnzipper(),
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.VectorDepthUnzipper(),
        sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        sz.graph_manager.FilterPresser(),
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.VectorDepthPresser(),
        sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)

%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))
%time gm.batch_press(graph, *[(u, {}) for u in unitigs])
%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))

sz.draw.update_xypositions(graph, init_step=1)
sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['length'], vertex_aspect=1)

sz.stats.degree_stats(graph)

In [None]:
%%time

np.random.seed(1)
gt.seed_rng(1)
sequence = sz.sequence.random_sequence(1000)

graph = sz.build.annotated_dbg(sequence, k=9, circularize=True, include_rc=True)
sz.draw.update_xypositions(graph)

graph.set_vertex_filter(graph.vp['filter'])

sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

gm = sz.graph_manager.GraphManager(
    unzippers=[
        sz.graph_manager.FilterUnzipper(),
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        sz.graph_manager.FilterPresser(),
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)

%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))
%time gm.batch_press(graph, *[(u, {}) for u in unitigs])
%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))

# sz.draw.update_xypositions(graph, init_step=1)
sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

sz.stats.degree_stats(graph)

In [None]:
# np.random.seed(1)
# gt.seed_rng(1)
sequence = sz.sequence.random_sequence(100_000)

graph = sz.build.annotated_dbg(sequence, k=11, circularize=True, include_rc=True)
# sz.draw.update_xypositions(graph)

graph.set_vertex_filter(graph.vp['filter'])

# sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

gm = sz.graph_manager.GraphManager(
    unzippers=[
        # sz.graph_manager.FilterUnzipper(),
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        # sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        # sz.graph_manager.FilterPresser(),
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        # sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)

# sz.draw.draw_graph(graph, vertex_aspect=1, vertex_text=graph.vp.length, vertex_color=graph.vp.depth)

# unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
# print(len(unitigs))
# gm.batch_press(graph, *[(u, {}) for u in unitigs])
# unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
# print(len(unitigs))

# # sz.draw.update_xypositions(graph, init_step=1)
# # sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

# # sz.draw.draw_graph(graph, vertex_aspect=1, vertex_text=graph.vp.depth, vertex_color=graph.vp.depth)

In [None]:
np.random.seed(1)
gt.seed_rng(1)
sequence = sz.sequence.random_sequence(100_000)

%time graph = sz.build.annotated_dbg(sequence, k=11, circularize=False, include_rc=True)
# sz.draw.update_xypositions(graph)

# sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

gm = sz.graph_manager.GraphManager(
    unzippers=[
        # sz.graph_manager.FilterUnzipper(),
        sz.graph_manager.LengthUnzipper(),
        sz.graph_manager.SequenceUnzipper(),
        sz.graph_manager.ScalarDepthUnzipper(),
        # sz.graph_manager.PositionUnzipper(offset=(0.1, 0.1)),
    ],
    pressers=[
        # sz.graph_manager.FilterPresser(),
        sz.graph_manager.LengthPresser(),
        sz.graph_manager.SequencePresser(sep=","),
        sz.graph_manager.ScalarDepthPresser(),
        # sz.graph_manager.PositionPresser(),
    ],
)
gm.validate(graph)

%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))
%time gm.batch_press(graph, *[(u, {}) for u in unitigs])
%time unitigs = list(sz.assembly.iter_maximal_unitig_paths(graph))
print(len(unitigs))

# sz.draw.update_xypositions(graph, init_step=1)
# sz.draw.draw_graph(graph, vertex_text=graph.vp['length'], vertex_fill_color=graph.vp['depth'], vertex_aspect=1)

sz.stats.degree_stats(graph)

In [None]:
%autoreload

In [None]:
graph

In [None]:
sz.assembly.find_paths(graph, graph.vertex(94), distance=3, vertex_weights=graph.vp['length'].a)

In [None]:
debug

In [None]:
local_graph, dist = sz.assembly.select_local_region(graph, 94, radius=10, weights=graph.vp['length'], directed=False)

In [None]:
sz.draw.draw_graph(local_graph, vertex_text=dist)

In [None]:
%timeit _ = sz.flow.estimate_flow(graph, graph.vp['depth'], graph.vp['length'], eps=0., maxiter=50)
# print(len(loss_hist))
# resid = graph.new_vertex_property('float', vals=resid)
# sz.draw.draw_graph(graph, vertex_aspect=1, vertex_text=graph.vp.depth, vertex_color=resid, edge_text=flow)

# plt.plot(loss_hist[1:])
# plt.yscale('log')

In [None]:
np.divide(

In [None]:
g2 = sz.io.load_graph("test.gt")

In [None]:
graph

In [None]:
g2

In [None]:
graph = gt.Graph([(0, 4), (1, 4), (4, 2), (4, 3), (2, 7), (3,7), (7, 5), (7, 6), (8, 5), (5, 9), (5, 10)])
graph.vp['filter'] = graph.new_vertex_property('bool', val=1)
graph.set_vertex_filter(graph.vp['filter'])
pos = gt.draw.graph_draw(graph)
graph.vp['filter'].a[5] = 0
sz.draw.draw_graph(graph, pos=pos)
graph.add_edge_list([(11, 5)])
sz.draw.draw_graph(graph, pos=pos)

In [None]:
_graph = gt.Graph()
_graph.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
# gt.draw.graph_draw(gt.GraphView(_graph), ink_scale=0.35, vertex_text=_graph.vertex_index)

_graph.vp['filter'] = _graph.new_vertex_property('bool', val=True)
_graph.set_vertex_filter(_graph.vp['filter'])

gm = sz.graph_manager.GraphManager(unzippers=[sz.graph_manager.FilterUnzipper()], pressers=[sz.graph_manager.FilterPresser()])
gm.validate(_graph)

gm.unzip(_graph, 3, [(2, 4), (2, 4)])
gm.unzip(_graph, 5, [(4, 6), (4, 6)])

sz.draw.draw_graph(_graph)
print(repr(sz.stats.degree_stats(_graph).reset_index().values))
assert np.array_equal(sz.stats.degree_stats(_graph).reset_index().values, [[1., 1., 5.],
       [0., 1., 1.],
       [1., 2., 1.],
       [2., 0., 1.],
       [2., 2., 1.]])

gm.unzip(_graph, 4, [(7, 9), (7, 10), (8, 9), (8, 10)])

sz.draw.draw_graph(_graph)
print(repr(sz.stats.degree_stats(_graph).reset_index().values))
assert np.array_equal(sz.stats.degree_stats(_graph).reset_index().values, [[1., 1., 5.],
       [1., 2., 3.],
       [2., 1., 2.],
       [0., 1., 1.],
       [2., 0., 1.]])

In [None]:
_graph.degree_property_map('in').a

In [None]:
_graph.degree_property_map('out').a