In [None]:
from graphviz import Source
from networkx.drawing.nx_agraph import to_agraph
from networkx.readwrite.gpickle import read_gpickle
import networkx as nx
import pandas as pd 

In [None]:
def recursive_newick(tree, node, count):
    successors = list(tree.successors(node))
    if len(successors) == 0:
        cell_id = node
        return f'{cell_id}:{count}'
    elif len(successors) == 1:
        count += 1
        return recursive_newick(tree, successors[0], count)
    else:
        children = []
        for child in successors:
            child_repr = recursive_newick(tree, child, 1)
            children.append(child_repr)
        return '(' + ','.join(children) + f'):{count}'
             

In [None]:
tree_filename = f'../../mt-SCITE_output/YFV2001/0.0556/0.0556_map0.gv'

In [None]:
tree = nx.drawing.nx_pydot.read_dot(tree_filename)

from networkx.drawing.nx_agraph import graphviz_layout, to_agraph
g = to_agraph(tree)

g = to_agraph(tree)
display(Source(g))

In [None]:
# Function to convert adjacency matrix (in the form of a pandas dataframe) to adjacency list
def adjacency_matrix_to_list(adj_matrix):
    adj_list = {}
    for node in adj_matrix.index:
        adj_list[node] = list(adj_matrix.columns[adj_matrix.loc[node] == 1])
    return adj_list

# Function to perform DFS and find all root-to-leaf paths
def find_paths(adj_list, node, path, paths):
    path.append(node)
    if len(adj_list[node]) == 0:
        paths.append(path.copy())
    else:
        for neighbor in adj_list[node]:
            find_paths(adj_list, neighbor, path, paths)
    path.pop()
    
def get_nodes(adj_matrix):
    nodes = list(adj_matrix.index)
    internal_nodes = [node for node in nodes if adj_matrix.loc[node].sum() > 1]
    return internal_nodes


# Function to create dataframe where leaf nodes are index and internal nodes are columns
def create_leaf_internal_df(adj_matrix):
    # Get list of leaf nodes
    leaf_nodes = [node for node in adj_matrix.index if adj_matrix.loc[node].sum() == 0]
    # Get list of internal nodes
    internal_nodes = [node for node in adj_matrix.index if adj_matrix.loc[node].sum() > 0]
    # Initialize dataframe with leaf nodes as index and internal nodes as columns
    leaf_internal_df = pd.DataFrame(0, index=leaf_nodes, columns=internal_nodes)
    return leaf_internal_df

def set_groups(leaf_internal_df, paths):
    leaf_internal_df = leaf_internal_df.copy()
    for path in paths: 
        leaf_node = path[-1]
        for node in path[:-1]:
            leaf_internal_df.loc[leaf_node, node] = 1 
    return leaf_internal_df


adj_matrix = nx.to_pandas_adjacency(tree)
adj_matrix = adj_matrix.loc[adj_matrix.index[1:], adj_matrix.columns[1:]]

adj_list = adjacency_matrix_to_list(adj_matrix)
nodes = get_nodes(adj_matrix)
internal_nodes = get_nodes(adj_matrix)
paths = []


find_paths(adj_list, '42', [], paths)
leaf_internal_df = create_leaf_internal_df(adj_matrix)
cells_path_from_root_df = set_groups(leaf_internal_df, paths)

In [None]:
cells_path_from_root_df

In [None]:
cells_path_from_root_df.to_csv('../../data/mt_path_annotation/yfv2001_mt_path.csv')