In [1]:
from collections import Counter
import os
from glob import glob

from graphviz import Source
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
from matplotlib.ticker import EngFormatter
from networkx.drawing.nx_agraph import to_agraph
from networkx.readwrite.gpickle import read_gpickle
import networkx as nx
import numpy as np
from scipy import stats
import pandas as pd
import seaborn as sns

import numbers


sns.set_style('white')
sns.set_context('notebook')

def plot_style(figsize=(12, 6), labelsize=20, titlesize=24, ticklabelsize=14, **kwargs):
   basic_style = {
       'figure.figsize': figsize,
       'axes.labelsize': labelsize,
       'axes.titlesize': titlesize,
       'xtick.labelsize': ticklabelsize,
       'ytick.labelsize': ticklabelsize,
       'axes.spines.top': False,
       'axes.spines.right': False,
       'axes.spines.left': False,
       'axes.grid': False,
       'axes.grid.axis': 'y',
   }
   basic_style.update(kwargs)
   return plt.rc_context(rc=basic_style)

blue = sns.xkcd_rgb['ocean blue']

In [2]:
# Function to convert adjacency matrix (in the form of a pandas dataframe) to adjacency list
def adjacency_matrix_to_list(adj_matrix):
    adj_list = {}
    for node in adj_matrix.index:
        adj_list[node] = list(adj_matrix.columns[adj_matrix.loc[node] == 1])
    return adj_list

# Function to perform DFS and find all root-to-leaf paths
def find_paths(adj_list, node, path, paths):
    path.append(node)
    if len(adj_list[node]) == 0:
        paths.append(path.copy())
    else:
        for neighbor in adj_list[node]:
            find_paths(adj_list, neighbor, path, paths)
    path.pop()
    
def get_nodes(adj_matrix):
    nodes = list(adj_matrix.index)
    internal_nodes = [node for node in nodes if adj_matrix.loc[node].sum() >= 1]
    return internal_nodes


# Function to create dataframe where leaf nodes are index and internal nodes are columns
def create_leaf_internal_df(adj_matrix):
    # Get list of leaf nodes
    leaf_nodes = [node for node in adj_matrix.index if adj_matrix.loc[node].sum() == 0]
    # Get list of internal nodes
    internal_nodes = [node for node in adj_matrix.index if adj_matrix.loc[node].sum() > 0]
    # Initialize dataframe with leaf nodes as index and internal nodes as columns
    leaf_internal_df = pd.DataFrame(0, index=leaf_nodes, columns=internal_nodes)
    return leaf_internal_df

def set_groups(leaf_internal_df, paths):
    leaf_internal_df = leaf_internal_df.copy()
    for path in paths: 
        leaf_node = path[-1]
        for node in path[:-1]:
            leaf_internal_df.loc[leaf_node, node] = 1 
    return leaf_internal_df

In [3]:
donors = ['A1_06', 'A1_30', 'JH', 'YFV2003']


In [4]:
graph_dicts_all = {}
meta_data_all = {}
for donor in donors: 
    graph_dict = {}
    meta_data = {}
    for path in list(glob(os.path.join(f'../../../mt-SCITE_output/{donor}/0.0556*'))):
        clone = path.split('0.0556_')[1]
        if donor == 'JH' and clone == '77':
            continue
        adj_matrix_clone = nx.to_pandas_adjacency(nx.drawing.nx_pydot.read_dot(f"{path}/0.0556_{clone}_map0.gv"))
        adj_matrix_clone = adj_matrix_clone.loc[adj_matrix_clone.index[1:], adj_matrix_clone.columns[1:]]    
        graph_dict[clone] = adj_matrix_clone
        meta_data[clone] = pd.read_csv(f"{path}/0.0556_meta_data_{clone}.csv", index_col=0).T
    graph_dicts_all[donor] = graph_dict
    meta_data_all[donor] = meta_data

for donor in donors: 
    graph_dict = graph_dicts_all[donor]     
    meta_data = meta_data_all[donor]   
    for clone in graph_dict.keys(): 
        nodes = get_nodes(graph_dict[clone])
        root = max(nodes)
        adj_list = adjacency_matrix_to_list(graph_dict[clone])
        paths = []
        path = []
        find_paths(adj_list, root, path, paths)
        leaf_internal_df = create_leaf_internal_df(graph_dict[clone])
        leaf_internal_df = set_groups(leaf_internal_df, paths)
        leaf_internal_df.index = [int(x[1:]) for x in leaf_internal_df.index]
        leaf_internal_df = leaf_internal_df.sort_index()[leaf_internal_df.columns.sort_values()]
        graph_dict[clone] = leaf_internal_df
        graph_dict[clone] = graph_dict[clone].drop(graph_dict[clone].columns[-1], axis=1)
        graph_dict[clone].index = meta_data[clone].index
        graph_dict[clone].columns = meta_data[clone].columns
    graph_dicts_all[donor] = graph_dict    