In [1]:
cd /home/olga/pureScratch/olgabot-maca/facs/

/mnt/pureScratch/olga/olgabot-maca/facs


In [107]:
import glob
import itertools
import math

import numpy as np
import scipy.sparse
from umap.umap_ import smooth_knn_dist, compute_membership_strengths

from umap.spectral import spectral_layout

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import sourmash

In [4]:
sourmash.__file__

'/home/olga/code/sourmash/sourmash/__init__.py'

In [5]:
# from sourmash.

In [6]:
# Defaults from 'sourmash index'

ksize = 21
moltype = 'DNA'

bf_size = 1e5
n_children = 2
scaled = False

from sourmash import signature as sig
from sourmash.sbt import Leaf
from sourmash.sbtmh import SigLeaf, create_sbt_index
from sourmash import sourmash_args
from sourmash.logging import notify

In [7]:

folder = '/home/olga/pureScratch/olgabot-maca/facs/sourmash/'

signatures = glob.glob('/home/olga/pureScratch/olgabot-maca/facs/sourmash/A10-D*')
print(f"len(signatures): {len(signatures)}")

def build_sbt(signatures, ksize, moltype, scaled, bf_size=1e5, n_children=2):
    tree = create_sbt_index(bf_size, n_children=n_children)

    inp_files = sourmash_args.traverse_find_sigs(signatures)

    n = 0
    ksizes = set()
    moltypes = set()
    nums = set()
    scaleds = set()

    for filename in inp_files:
        notify('loading {}', filename, end='\r')
        siglist = sig.load_signatures(filename,
                                     ksize=ksize,
                                     select_moltype=moltype)
        siglist = list(siglist)
        if not siglist:
            notify('\nwarning: no signatures loaded at given ksize/molecule type from {}', filename)

        # load all matching signatures in this file
        ss = None
        for ss in siglist:
            ksizes.add(ss.minhash.ksize)
            moltypes.add(sourmash_args.get_moltype(ss))
            nums.add(ss.minhash.num)

            if scaled:
                ss.minhash = ss.minhash.downsample_scaled(args.scaled)
            scaleds.add(ss.minhash.scaled)

            leaf = SigLeaf(ss.md5sum(), ss)
            tree.add_node(leaf)
            n += 1

        if not ss:
            continue

        # check to make sure we aren't loading incompatible signatures
        if len(ksizes) > 1 or len(moltypes) > 1:
            error('multiple k-mer sizes or molecule types present; fail.')
            error('specify --dna/--protein and --ksize as necessary')
            error('ksizes: {}; moltypes: {}',
                  ", ".join(map(str, ksizes)), ", ".join(moltypes))
            sys.exit(-1)

        if nums == { 0 } and len(scaleds) == 1:
            pass # good
        elif scaleds == { 0 } and len(nums) == 1:
            pass # also good
        else:
            error('trying to build an SBT with incompatible signatures.')
            error('nums = {}; scaleds = {}', repr(nums), repr(scaleds))
            sys.exit(-1)
            
    return tree

len(signatures): 16


In [8]:
%time tree = build_sbt([folder], ksize=21, moltype="DNA", scaled=False)

[Kloading /home/olga/pureScratch/olgabot-maca/facs/sourmash/P9-MAA001884-3_38_F-1-1_S151.siggloading /home/olga/pureScratch/olgabot-maca/facs/sourmash/P10-D045058-3_39_F-1-1_S49.sig

CPU times: user 18min 19s, sys: 12.9 s, total: 18min 32s
Wall time: 21min 21s


[Kloading /home/olga/pureScratch/olgabot-maca/facs/sourmash/P9-MAA001869-3_38_F-1-1_S207.sig

In [13]:
%time tree.save("/home/olga/pureScratch/olgabot-maca/facs/tabula-muris-k21.sbt.json")

[K100623 of 100623 nodes saved
Finished saving nodes, now saving SBT json file.


CPU times: user 2min 7s, sys: 39.9 s, total: 2min 47s
Wall time: 7min 27s


'/home/olga/pureScratch/olgabot-maca/facs/tabula-muris-k21.sbt.json'

In [None]:
%%time

# initialize search queue with top node of tree
n_neighbors = 5
n_parent_levels = math.log2(n_neighbors) + 1
adjacencies = []
matches = []
visited, queue = set(), [0]

ignore_abundance = False
downsample = False

def get_leaves_under(tree, node_position):
    visited, queue = set(), [node_position]
    leaves = []
    
    while queue:
        position = queue.pop(0)
        node = tree.nodes.get(position, None)
        
        if isinstance(node, Leaf):
            leaves.append(node)
        else:
            queue.extend(c.pos for c in tree.children(position))
    return leaves

# while the queue is not empty, load each node and apply search
# function.
while queue:
    position = queue.pop(0)
    node = tree.nodes.get(position, None)
    
    # repair while searching.
    if node is None:
#         print("repairing...")
        if position in tree.missing_nodes:
            tree._rebuild_node(node)
            node = tree.nodes[position]
        else:
            continue
            
    # if we have not visited this node before,
    if position not in visited:
        visited.add(position)
        
    # Add 
    if isinstance(node, SigLeaf):
#         print(node.data)
        n = 1
        upper_internal_node = tree.parent(position)
        while n < n_parent_levels:
            upper_internal_node = tree.parent(upper_internal_node.pos)
            n += 1
#         print("upper_internal_node:", upper_internal_node)
        leaves = get_leaves_under(tree, upper_internal_node.pos)

        
        similarities = []
        for leaf in leaves:
            # Ignore self-simililarity
            if leaf == node:
                continue
#             print(leaf.data)
            similarity = node.data.similarity(leaf.data, 
                                              ignore_abundance=ignore_abundance, 
                                              downsample=downsample)
            similarities.append([node.data.name(), leaf.data.name(), similarity])
        adjacent = sorted(similarities, key=lambda x: x[1])[-n_neighbors:]
#         print(adjacent)
        adjacencies.extend(adjacent)
            
#         break
        
    else:
        queue.extend(c.pos for c in tree.children(position))
    
#     print(node)
#     print(node.data)
#     print()
    visited.add(node)

print(len(adjacencies))

In [None]:
# %%time

# adjacencies = tree.nearest_neighbor_adjacencies(n_neighbors=5, ignore_abundance=True, 
#                                                 downsample=False)

# print("len(adjacencies)", len(adjacencies))
# adjacencies[:10]

In [21]:
# list(tree.leaves_under(37482))

In [31]:
len(tree.nodes)

100623

In [33]:

from sourmash.sbt import Leaf

In [78]:
def leaves(tree):
    for i, node in tree.nodes.items():
        if isinstance(node, SigLeaf) or instance(node, Leaf):
            yield node
        
sum(1 for _ in leaves(tree))

50312

In [79]:
leaf_to_index = dict((node.data.name(), i) for i, node in enumerate(leaves(tree)))
index_to_leaf = dict(zip(leaf_to_index.values(), leaf_to_index.keys()))

len(leaf_to_index)

50312

In [80]:
max(leaf_to_index.values())

50311

In [43]:
list(leaf_to_index.items())[:10]

[('A8-B002777-3_39_F-1-1_S59', 50311),
 ('I15-B001750-3_38_F-1-1_S220', 50312),
 ('D8-MAA000871-3_11_M-1-1_S63', 50313),
 ('I15-MAA000586-3_8_M-1-1_S260', 50314),
 ('B7-MAA001857-3_38_F-1-1_S124', 50315),
 ('I15-MAA000590-3_9_M-1-1_S203', 50316),
 ('D9-B000826-3_39_F-1-1_S242', 50317),
 ('I15-MAA000439-3_10_M-1-1_S146', 50318),
 ('A19-D041904-3_10_M-1-1_S157', 50319),
 ('I15-MAA000409-3_10_M-1-1_S282', 50320)]

## Build UMAP on adjacencies

- [umap's nearest neighbors](https://github.com/lmcinnes/umap/blob/834184f9c0455f26db13ab148c0abd2d3767d968//umap/umap_.py#L159:5)
- [UMAP's transform](https://github.com/lmcinnes/umap/blob/master/umap/umap_.py#L1427)

How [UMAP does it internally when the metric is precomputed](https://github.com/lmcinnes/umap/blob/master//umap/umap_.py#L200):

```python
    if metric == "precomputed":
        # Note that this does not support sparse distance matrices yet ...
        # Compute indices of n nearest neighbors
        knn_indices = np.argsort(X)[:, :n_neighbors]
        # Compute the nearest neighbor distances
        #   (equivalent to np.sort(X)[:,:n_neighbors])
        knn_dists = X[np.arange(X.shape[0])[:, None], knn_indices].copy()
```

In [81]:
knn_indices = []
knn_dists = []

for u, items in itertools.groupby(adjacencies[:10], key=lambda x: x[0]):
    knn_indices_line = []
    knn_dists_line = []
    for u, v, similarity in items:
        knn_indices_line.append(leaf_to_index[v])
        knn_dists_line.append(1-similarity)
    knn_indices.append(knn_indices_line)
    knn_dists.append(knn_dists_line)
print(f"knn_indices: {knn_indices}")
print(f"knn_dists: {knn_dists}")

knn_indices: [[50307, 50305, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303]]
knn_dists: [[0.8980427004179393, 0.9599218185483611, 0.9839423609219159, 0.9990180328504167, 0.979380403003595], [0.9628889870576949, 0.4797842985273033, 0.48082970657557933, 0.9834741500789255, 0.5998726152045757]]


In [83]:
knn_indices = []
knn_dists = []

for u, items in itertools.groupby(adjacencies, key=lambda x: x[0]):
    knn_indices_line = []
    knn_dists_line = []
    for u, v, similarity in items:
        knn_indices_line.append(leaf_to_index[v])
        knn_dists_line.append(1-similarity)
    knn_indices.append(knn_indices_line)
    knn_dists.append(knn_dists_line)
print(f"knn_indices: {knn_indices[-10:]}")
print(f"knn_dists: {knn_dists[-10:]}")

knn_indices: [[50307, 50305, 50297, 50301, 50303], [50309, 50307, 50305, 50297, 50301], [50307, 50305, 50297, 50301, 50303], [50309, 50307, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303], [50309, 50305, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303], [50307, 50305, 50297, 50301, 50303]]
knn_dists: [[0.9778139394407598, 0.5865537452356665, 0.5916232897670664, 0.98692031043282, 0.682226850530286], [0.5300652108409426, 0.9920402425804716, 0.6772386593777987, 0.660234167482528, 0.9887831152269003], [0.9781602437008845, 0.5478392578407241, 0.5322415135173466, 0.985198052660397, 0.6666717421966621], [0.6641045046439364, 0.9789997887267935, 0.4498235089138267, 0.9848128448550353, 0.6772386593777987], [0.9723907241140146, 0.7921952958910957, 0.7887829664318484, 0.9927943425894896, 0.8339212803609773], [0.9999661326169812, 0.9789997887267935, 0.9765970894122245, 0.9993956385880218, 0.9920402425804716

In [84]:
knn_dists_np = np.array(knn_dists)
knn_indices_np = np.array(knn_indices)

### Fuzzy Simplicial Set



Called within [`UMAP.fit`](https://github.com/lmcinnes/umap/blob/master//umap/umap_.py#L1437):

```python
graph_ = fuzzy_simplicial_set(
                X,
                self.n_neighbors,
                random_state,
                self.metric,
                self._metric_kwds,
                self._knn_indices,
                self._knn_dists,
                self.angular_rp_forest,
                self.set_op_mix_ratio,
                self.local_connectivity,
                self.verbose,
            )
```

[`fuzzy_simplicial_set` from `umap-learn`](https://github.com/lmcinnes/umap/blob/master//umap/umap_.py#L474)


```python
    if knn_indices is None or knn_dists is None:
        knn_indices, knn_dists, _ = nearest_neighbors(
            X, n_neighbors, metric, metric_kwds, angular, random_state, verbose=verbose
        )

    sigmas, rhos = smooth_knn_dist(
        knn_dists, n_neighbors, local_connectivity=local_connectivity
    )

    rows, cols, vals = compute_membership_strengths(
        knn_indices, knn_dists, sigmas, rhos
    )

    result = scipy.sparse.coo_matrix(
        (vals, (rows, cols)), shape=(X.shape[0], X.shape[0])
    )
    result.eliminate_zeros()

    transpose = result.transpose()

    prod_matrix = result.multiply(transpose)

    result = (
        set_op_mix_ratio * (result + transpose - prod_matrix)
        + (1.0 - set_op_mix_ratio) * prod_matrix
    )

    result.eliminate_zeros()

    return result
```

In [None]:
knn_dists_np.shape

In [None]:
n_components

In [86]:
knn_dists_np.shape

(50312, 5)

In [87]:
knn_indices_np.shape

(50312, 5)

In [88]:
knn_indices_np.max()

50309

In [99]:
# Default in UMAP
# https://github.com/lmcinnes/umap/blob/master///umap/umap_.py#L1246
local_connectivity = 1
set_op_mix_ratio = 1

sigmas, rhos = smooth_knn_dist(
        knn_dists_np, n_neighbors, local_connectivity=local_connectivity
    )

rows, cols, vals = compute_membership_strengths(
        knn_indices_np, knn_dists_np, sigmas, rhos
    )

result = scipy.sparse.coo_matrix(
        (vals, (rows, cols)), shape=(knn_dists_np.shape[0], knn_dists_np.shape[0])
    )
result.eliminate_zeros()

transpose = result.transpose()

prod_matrix = result.multiply(transpose)

result = (
    set_op_mix_ratio * (result + transpose - prod_matrix)
    + (1.0 - set_op_mix_ratio) * prod_matrix
)

result.eliminate_zeros()


graph_ = result

In [93]:
n_samples = knn_dists_np.shape[0]
n_samples

50312

In [95]:
_search_graph = scipy.sparse.lil_matrix(
    (n_samples, n_samples), dtype=np.int8
)
_search_graph.rows = knn_indices_np
_search_graph.data = (knn_dists_np != 0).astype(np.int8)
_search_graph = _search_graph.maximum(
    _search_graph.transpose()
).tocsr()

#### Don't need any of the below because metric is precomputed

In [96]:
# if callable(self.metric):
#     self._distance_func = self.metric
# elif self.metric in dist.named_distances:
#     self._distance_func = dist.named_distances[self.metric]
# elif self.metric == "precomputed":
#     warn(
#         "Using precomputed metric; transform will be unavailable for new data"
#     )
# else:
#     raise ValueError(
#         "Metric is neither callable, " + "nor a recognised string"
#     )

# if self.metric != "precomputed":
#     self._dist_args = tuple(self._metric_kwds.values())

#     self._random_init, self._tree_init = make_initialisations(
#         self._distance_func, self._dist_args
#     )
#     self._search = make_initialized_nnd_search(
#         self._distance_func, self._dist_args
#     )

## Actually do the embedding

n_epochs = None

```python
embedding_ = simplicial_set_embedding(
    self._raw_data,
    self.graph_,
    self.n_components,
    self._initial_alpha,
    self._a,
    self._b,
    self.repulsion_strength,
    self.negative_sample_rate,
    n_epochs,
    init,
    random_state,
    self.metric,
    self._metric_kwds,
    self.verbose,
)
```


Calls [simplicial_set_embedding](https://github.com/lmcinnes/umap/blob/master//umap/umap_.py#L857)

In [103]:
n_epochs = 0

graph = graph_.tocoo()
graph.sum_duplicates()
n_vertices = graph.shape[1]

if n_epochs <= 0:
    # For smaller datasets we can use more epochs
    if graph.shape[0] <= 10000:
        n_epochs = 500
    else:
        n_epochs = 200
n_epochs

200

In [104]:
graph.data[graph.data < (graph.data.max() / float(n_epochs))] = 0.0
graph.eliminate_zeros()

[Default initialization is "spectral"](https://github.com/lmcinnes/umap/blob/master//umap/umap_.py#L1242)

In [105]:

init = "spectral"

In [108]:
# We add a little noise to avoid local minima for optimization to come
initialisation = spectral_layout(
    data,
    graph,
    n_components,
    random_state,
    metric=metric,
    metric_kwds=metric_kwds,
)

### Witihin [spectral_layout](https://github.com/lmcinnes/umap/blob/master///umap/spectral.py#L199)

In [110]:
n_samples = graph.shape[0]
n_components, labels = scipy.sparse.csgraph.connected_components(graph)
n_components

3144

### if `n_components > 1`: [call `multi_component_layout`](https://github.com/lmcinnes/umap/blob/master////umap/spectral.py#L65)


```python
    if n_components > 1:
        warn(
            "Embedding a total of {} separate connected components using meta-embedding (experimental)".format(
                n_components
            )
        )
        return multi_component_layout(
            data,
            graph,
            n_components,
            labels,
            dim,
            random_state,
            metric=metric,
            metric_kwds=metric_kwds,
        )
```

In [120]:
dim = 2
dim

2

In [None]:
n_components

In [115]:
result = np.empty((graph.shape[0], dim), dtype=np.float32)

### If lots and lots of components...

```python
if n_components > 2 * dim:
    meta_embedding = component_layout(
        data,
        n_components,
        component_labels,
        dim,
        metric=metric,
        metric_kwds=metric_kwds,
    )
```

### [Within `component_layout`](https://github.com/lmcinnes/umap/blob/master/////umap/spectral.py#L11:5)




In [121]:
graph.shape

(50312, 50312)

In [None]:
index_to_leaf = dict(zip(leaf_to_index.values(), leaf_to_index.keys()))

In [None]:
for i, label in zip(range(2), range(n_components)):
    

In [None]:
def component_layout(sbt, n_features, n_components, component_labels, dim):
    component_centroids = np.empty((n_components, data.shape[1]), dtype=np.float64)
    


## Let's just do random embedding for now

In [None]:
embedding = random_state.uniform(
    low=-10.0, high=10.0, size=(graph.shape[0], n_components)
).astype(np.float32)

In [111]:
expansion = 10.0 / np.abs(initialisation).max()
embedding = (initialisation * expansion).astype(
    np.float32
) + random_state.normal(
    scale=0.0001, size=[graph.shape[0], n_components]
).astype(
    np.float32
)


epochs_per_sample = make_epochs_per_sample(graph.data, n_epochs)

head = graph.row
tail = graph.col

rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
embedding = optimize_layout(
    embedding,
    embedding,
    head,
    tail,
    n_epochs,
    n_vertices,
    epochs_per_sample,
    a,
    b,
    rng_state,
    gamma,
    initial_alpha,
    negative_sample_rate,
    verbose=verbose,
)

NameError: name 'initialisation' is not defined

In [50]:
knn_dists

[[], []]

In [38]:
isinstance(node, SigLeaf)

True

In [22]:
len(adjacencies)

251560

In [23]:
len(tree.nodes)

100623

In [26]:
X = np.random.randn(100).reshape(10, 10)

np.argsort(X)[:, :n_neighbors]

array([[7, 0, 1, 2, 8],
       [4, 9, 6, 0, 1],
       [8, 4, 9, 2, 5],
       [6, 4, 2, 1, 0],
       [1, 0, 8, 4, 2],
       [8, 9, 0, 5, 2],
       [1, 2, 9, 0, 3],
       [4, 0, 6, 3, 2],
       [5, 9, 0, 7, 6],
       [8, 0, 2, 4, 1]])

In [None]:
knn

In [None]:
knn_indices = np.argsort(X)[:, :n_neighbors]
# Compute the nearest neighbor distances
#   (equivalent to np.sort(X)[:,:n_neighbors])
knn_dists = X[np.arange(X.shape[0])[:, None], knn_indices].copy()

rp_forest = []

In [11]:
len(adjacencies)

NameError: name 'adjacencies' is not defined

In [26]:
adjacencies[:10]

[]

In [20]:
adjacencies[:100]

[['A8-B002777-3_39_F-1-1_S59', 'P9-MAA001884-3_38_F-1-1_S151', 0.04],
 ['A8-B002777-3_39_F-1-1_S59', 'P9-MAA001888-3_39_F-1-1_S244', 0.044],
 ['A8-B002777-3_39_F-1-1_S59', 'P9-MAA001889-3_38_F-1-1_S151', 0.028],
 ['A8-B002777-3_39_F-1-1_S59', 'P9-MAA001892-3_38_F-1-1_S225', 0.034],
 ['A8-B002777-3_39_F-1-1_S59', 'P9-MAA001894-3_39_F-1-1_S35', 0.034],
 ['I15-B001750-3_38_F-1-1_S220', 'P9-MAA001884-3_38_F-1-1_S151', 0.042],
 ['I15-B001750-3_38_F-1-1_S220', 'P9-MAA001888-3_39_F-1-1_S244', 0.04],
 ['I15-B001750-3_38_F-1-1_S220', 'P9-MAA001889-3_38_F-1-1_S151', 0.042],
 ['I15-B001750-3_38_F-1-1_S220', 'P9-MAA001892-3_38_F-1-1_S225', 0.044],
 ['I15-B001750-3_38_F-1-1_S220', 'P9-MAA001894-3_39_F-1-1_S35', 0.028],
 ['D8-MAA000871-3_11_M-1-1_S63', 'P9-MAA001884-3_38_F-1-1_S151', 0.048],
 ['D8-MAA000871-3_11_M-1-1_S63', 'P9-MAA001888-3_39_F-1-1_S244', 0.036],
 ['D8-MAA000871-3_11_M-1-1_S63', 'P9-MAA001889-3_38_F-1-1_S151', 0.02],
 ['D8-MAA000871-3_11_M-1-1_S63', 'P9-MAA001892-3_38_F-1-1_S225', 0

In [48]:
# adjacencies = []

# n_neighbors = 5

# for i, leaf in zip(range(10), tree.leaves()):
#     print(leaf)
#     print(leaf.data)
#     print()

In [79]:
adjacencies = []

n_neighbors = 3

n_parent_levels = math.ceil(math.log2(n_neighbors)) + 1
n_parent_levels

3

In [93]:
# initialize search queue with top node of tree
matches = []
visited, queue = set(), [0]

ignore_abundance = True
downsample = False

def get_leaves_under(tree, node_position):
    visited, queue = set(), [node_position]
    leaves = []
    
    while queue:
        position = queue.pop(0)
        node = tree.nodes.get(position, None)
        
        if isinstance(node, Leaf):
            leaves.append(node)
        else:
            queue.extend(c.pos for c in tree.children(position))
    return leaves

# while the queue is not empty, load each node and apply search
# function.
while queue:
    position = queue.pop(0)
    node = tree.nodes.get(position, None)
    
    # repair while searching.
    if node is None:
        print("repairing...")
        if position in tree.missing_nodes:
            tree._rebuild_node(node)
            node = tree.nodes[position]
        else:
            continue
            
    # if we have not visited this node before,
    if position not in visited:
        visited.add(position)
        
    # Add 
    if isinstance(node, Leaf):
#         print(node.data)
        n = 1
        upper_internal_node = tree.parent(position)
        while n < n_parent_levels:
            upper_internal_node = tree.parent(upper_internal_node.pos)
            n += 1
        print("upper_internal_node:", upper_internal_node)
        leaves = get_leaves_under(tree, upper_internal_node.pos)

        
        similarities = []
        for leaf in leaves:
            # Ignore self-simililarity
            if leaf == node:
                continue
#             print(leaf.data)
            similarity = node.data.similarity(leaf.data, 
                                              ignore_abundance=ignore_abundance, 
                                              downsample=downsample)
            similarities.append([node.data.name(), leaf.data.name(), similarity])
        adjacent = sorted(similarities, key=lambda x: x[1])[-n_neighbors:]
#         print(adjacent)
        adjacencies.extend(adjacent)
            
#         break
        
    else:
        queue.extend(c.pos for c in tree.children(position))
    
#     print(node)
#     print(node.data)
#     print()
    visited.add(node)

upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=1, node=<sourmash.sbt.Node object at 0x7f02e3da5ba8>)
upper_internal_node: NodePos(pos=2, node=<sourmash.sbt.Node object at 0x7f02e3da5c88>)
upper_internal_node: NodePos(pos=2, node=<sourmash.sbt.Node object at 0x7f02e3da5c88>)
upper_internal_node: NodePos(pos=2, node=<sourmash.sbt.Node object at 0x7f02e3da5c88>)
upper_internal_node: NodePos(pos=2, node=<s

In [25]:
adjacencies[:10]

[]

In [69]:
leaves

[<sourmash.sbtmh.SigLeaf at 0x7f02e3da5e48>,
 <sourmash.sbtmh.SigLeaf at 0x7f02e3da5860>]

In [61]:
tree.children(7)

[NodePos(pos=15, node=<sourmash.sbtmh.SigLeaf object at 0x7f02e3da5e48>),
 NodePos(pos=16, node=<sourmash.sbtmh.SigLeaf object at 0x7f02e3da5860>)]

In [47]:
len(visited)

311

In [36]:
tree.children(pos)

[NodePos(pos=19, node=<sourmash.sbt.Node object at 0x7f0320088dd8>),
 NodePos(pos=20, node=<sourmash.sbt.Node object at 0x7f0320088ba8>)]

In [37]:
is_leaf = False

for child in tree.children(pos):
    print(child)
    print(tree.children(child.pos))

NodePos(pos=19, node=<sourmash.sbt.Node object at 0x7f0320088dd8>)
[NodePos(pos=39, node=<sourmash.sbt.Node object at 0x7f0320039208>), NodePos(pos=40, node=<sourmash.sbt.Node object at 0x7f03200392e8>)]
NodePos(pos=20, node=<sourmash.sbt.Node object at 0x7f0320088ba8>)
[NodePos(pos=41, node=<sourmash.sbt.Node object at 0x7f0320055668>), NodePos(pos=42, node=<sourmash.sbt.Node object at 0x7f0320055940>)]


In [31]:
children.pos

20