In [1]:
import numpy as np
import pandas as pd
from ete3 import Tree
import utils
from collections import deque

pd.set_option('display.max_rows', 500)

In [2]:
#setting parameters
dataset = 2
nclones = 15
ncells = 100
nbranches = 2 * (nclones + 1) - 2
fp = 0.05
fn = 0.2
rec_rate = 0.2
iterations = 1
d_dollo = 3 # for Dollo-d model

file_tree = './data/s' + str(dataset) + '/tree.nw'
file_obs = './data/s' + str(dataset) + '/obs_genotype.csv'
file_map = './data/s' + str(dataset) + '/cell_map_clone.csv'
file_out = './data/s' + str(dataset) + '/probabilities.csv'
file_loc = './data/s' + str(dataset) + '/predict_loc.csv'
file_result = './data/s' + str(dataset) + '/result.csv'

In [3]:
# generate all n-digit binary number combinations
def permute(n):
    comb = []
    for i in range(1<<n):
        s = bin(i)[2:]
        s = '0'*(n-len(s)) + s
        comb = comb + [list(map(int,list(s)))]
    return comb    

In [4]:
def fill_missing(ncells, obs_geno_i, cells, cells_map_clone, clone_map_cells):
    
    obs_geno_i_mod = []
    old_count_miss = 0
    for cell in cells:
        if obs_geno_i[cell] == '-':
            clone = cells_map_clone.loc[0, cell]
            ones = 0
            zeros = 0
            clone_size = len(clone_map_cells[clone])
            
            for clone_cell in clone_map_cells[clone]:
                if str(obs_geno_i[clone_cell]) == '1':
                    ones += 1
                elif str(obs_geno_i[clone_cell]) == '0':
                    zeros += 1
            if ones > clone_size // 2:
                obs_geno_i_mod.append('1')
            elif zeros > clone_size // 2:
                obs_geno_i_mod.append('0')
            else:
                obs_geno_i_mod.append('-')
            old_count_miss += 1
        else:
            obs_geno_i_mod.append(obs_geno_i[cell])
                
    ambiguity_index = []
    normal_index = []
    allele_index = []

    # find the index of each gene status 
    for j in range(ncells):
        if str(obs_geno_i_mod[j]) == '0':
            normal_index = normal_index + [j]
        elif str(obs_geno_i_mod[j]) == '1':
            allele_index = allele_index + [j]
        else:
            ambiguity_index = ambiguity_index + [j]
    
    new_count_miss = len(ambiguity_index)
    
    obs_geno_i_mat_ = np.empty((2**len(ambiguity_index), ncells))
    obs_geno_i_mat = pd.DataFrame(obs_geno_i_mat_, columns = cells)
    
    # finding all possible situations for ambguity
    if new_count_miss:
        obs_geno_i_mat.iloc[:, ambiguity_index] = permute(len(ambiguity_index)) 
    obs_geno_i_mat.iloc[:, normal_index] = 0
    obs_geno_i_mat.iloc[:, allele_index] = 1
    
    # for each of the possible situation, assign weight to them, here, using equal weights
    ambiguity_weight = np.array([1 / obs_geno_i_mat.shape[0]] * obs_geno_i_mat.shape[0])
    
    obs_geno_miss_dict = {}
    obs_geno_miss_dict['obs_geno_i_mat'] = obs_geno_i_mat
    obs_geno_miss_dict['ambiguity_weight'] = ambiguity_weight
    obs_geno_miss_dict['count_miss'] = (old_count_miss, new_count_miss)
    
    return obs_geno_miss_dict

In [5]:
def back_mutations(branches, i, d_dollo, queue, ancs_branches, desc_branches, output):
 
    # if all elements are processed, print the current subset
    if d_dollo == 0 or i < 0:
        output.append(list(queue)[::-1])
        return
    
    # include the current element in the current subset and recur
    queue.append(branches[i])
    remains = set(branches) - set(desc_branches[branches[i]]) - set(ancs_branches[branches[i]]) - set([branches[i]])
    remains = list(remains)
    remains.sort()
    back_mutations(remains, len(remains) - 1, d_dollo - 1, queue, ancs_branches, desc_branches, output)
    
    # backtrack: exclude the current element from the current subset
    queue.pop()

    # exclude the current element from the current subset and recur
    back_mutations(branches[0:-1], len(branches[0:-1]) - 1, d_dollo, queue, ancs_branches, desc_branches, output)

In [6]:
# loop over all the branches, and find the possible final stage if the event occurs in that branch
def get_true_geno_mat(clones, clone_branch, desc_leaves, desc_branches, ancs_branches, d_dollo):
    
    num_branches = len(desc_leaves)
    # 1st col: type of mutaion (N: one mutation, B: With Back mutation)
    # 2nd col: 1st mutation
    # 3rd col to (d_dollo + 2)th col: Back mutation
    # others are clones
    
    col_name = ['type']
    for i in range(1, d_dollo + 2):
        col_name.append('branch' + str(i)) 
    col_name = col_name + clones
    
    true_geno_mat = pd.DataFrame(columns = col_name)
    case = 0
    for branch1 in range(1, num_branches):
        true_geno_mat = utils.append_row(true_geno_mat, col_name)
        true_geno_mat.iloc[case, 0] = 'N'
        true_geno_mat.iloc[case, 1] = branch1
        for i in range(1, d_dollo + 1):
            true_geno_mat.iloc[case, i + 1] = '-'
        true_geno_mat.iloc[case][desc_leaves[branch1]] = 1
        case += 1
        
        branches = desc_branches[branch1]
        output = []
        back_mutations(branches, len(branches) - 1, d_dollo, deque(), ancs_branches, desc_branches, output)
        

        for brs_lst in output:
            if len(brs_lst):
                true_geno_mat = utils.append_row(true_geno_mat, col_name)
                true_geno_mat.iloc[case, 0] = 'B'
                true_geno_mat.iloc[case, 1] = branch1
                for i in range(1, d_dollo + 1):
                    true_geno_mat.iloc[case, i + 1] = '-'
                leaves = desc_leaves[branch1]
                for i in range(len(brs_lst)):
                    true_geno_mat.iloc[case, i + 2] = brs_lst[i]
                    leaves = list(set(leaves) - set(desc_leaves[brs_lst[i]])) # - set(clone_branch))
                true_geno_mat.iloc[case][leaves] = 1
                case += 1

    return true_geno_mat

In [7]:
# for given location, find the probability of mutation on each branch
def get_mutation_prob_Bj(lambda_, branch_time_lst, desc_branches, true_geno_mat, rec_rate, d_dollo):
    
    prob_Bj_ind = []
    
    for l in range(len(branch_time_lst)):
        # assuming the exponential distribution on each branch
        # prob_Bj_ind[l] is the prob of mutation on branch l
        prob_Bj_ind.append(np.exp(-(lambda_ + 1) * branch_time_lst[l]))
    
    prob_Bj_ind = np.array(prob_Bj_ind)
#     print(prob_Bj_ind)

    prob_Bj_mat = np.array([lambda_ + prob_Bj_ind,] * (true_geno_mat.shape[0]))
#     print(prob_Bj_mat)

    for i in range(true_geno_mat.shape[0]):
        if true_geno_mat.iloc[i,0] == 'N':
            # mutated branch (one mutation only)
            br = true_geno_mat.iloc[i,1]
            prob_Bj_mat[i, br] = 1 - prob_Bj_ind[br]   # 0 --> 1
            # descendant branches
            prob_Bj_mat[i, desc_branches[br]] = 1 + lambda_ * prob_Bj_ind[desc_branches[br]]   # 1 --> 1
            
        else: # true_geno_mat.iloc[i,0] == 'B'
            # mutated branch (first mutation)
            br = true_geno_mat.iloc[i,1]
            prob_Bj_mat[i, br] = 1 - prob_Bj_ind[br]   # 0 --> 1
            # descendant branches
            prob_Bj_mat[i, desc_branches[br]] = 1 + lambda_ * prob_Bj_ind[desc_branches[br]]   # 1 --> 1
            # mutated branch (back mutation)
            back_br = []
            j = 2
            while true_geno_mat.iloc[i, j] != '-' and j < (d_dollo + 2):
                back_br.append(true_geno_mat.iloc[i, j])
                j += 1
            prob_Bj_mat[i, back_br] = lambda_ * (1 - prob_Bj_ind[back_br])   # 1 --> 0

            # descendant branches
            for br in back_br:
                prob_Bj_mat[i, desc_branches[br]] = lambda_ + prob_Bj_ind[desc_branches[br]]   # 0 --> 0

    prob_Bj_mat = prob_Bj_mat / (1 + lambda_)
    prob_Bj_mat_ = prob_Bj_mat[:,1:]
    
    # take the product of each row to find the probability of mutation on branch but not on other branches
    prob_Bj_final = np.prod(prob_Bj_mat_, axis = 1)
    
    # prior for recurrent mutation
    for i in range(true_geno_mat.shape[0]):
        if true_geno_mat.iloc[i,0] == 'B':
            prob_Bj_final[i] *= rec_rate
        elif true_geno_mat.iloc[i,0] == 'N':
            prob_Bj_final[i] *= (1 - rec_rate)
            
    prob_Bj_final = prob_Bj_final / np.sum(prob_Bj_final) 
    prob_Bj_final = prob_Bj_final / np.sum(prob_Bj_final) 
    
    return prob_Bj_final

In [8]:
def generate_prob_binary(fp, fn, obs_geno_mat_i, ambiguity_weight, true_geno_mat, prob_Bj, d_dollo):
 
    #####################################################################################
    # Mutation model: find the prob of mutation on each branch, but not on other branches
    #####################################################################################
    
    # create the error matrix
    # if true is 1, and observed is 1, prob is 1-fn
    # if true is 0 and observed is 0, prob is 1-fp
    # if true is 1 and observed is 0, false negative, the prob is fn (beta)
    # if true is 0 and observed is 1, false positive, the prob is fp (alpha)

    sequencing_error_model = np.array([[1.0 - fp, fp], [fn, 1.0 - fn]])

    # error_result_mat is a list of matrix, in the list, the number of matrix equals number of branches,
    # and each matrix is as obs_geno_mat, which is all possible situations
    # each matrix is for one branch
    
    num_rows = obs_geno_mat_i.shape[0]
    num_cols = obs_geno_mat_i.shape[1]
    error_result_mat = np.zeros((true_geno_mat.shape[0], num_rows, num_cols))
     
    # error_result_mat[t][k] is the error prob if the gene mutation occurs on branch k
    # error_result_mat[t][k] each line correspond to the possible observed(ambiguity) in obs_genotype_mat

    # branch k (mutation on branch k)
    for k in range(true_geno_mat.shape[0]):
        # situation j (possible situation for ambguity status)
        # obs_geno_mat_i.shape[0] = 2^(# of ambiguity sites)
        for j in range(num_rows):
            # tip i 
            for i in range(num_cols):
                tr_st = int(true_geno_mat.iloc[k, i + 2  + d_dollo])
                obs_st = int(obs_geno_mat_i.iloc[j, i])
                error_result_mat[k][j][i] = sequencing_error_model[tr_st][obs_st]
                
    # error_prob is a matrix of ncol = number of ambguities, nrow = number of branches
    
    error_prob = np.zeros((true_geno_mat.shape[0], obs_geno_mat_i.shape[0]))
    # branch k
    for k in range(true_geno_mat.shape[0]):
        # for situation j
        for j in range(obs_geno_mat_i.shape[0]):
            # the conditional probabilities of the observed data given the true genotype at a genomic site k
             error_prob[k,j] = np.prod(error_result_mat[k][j,:])


    # weight is assigned to each possible situation(ambguity), and the total weighted prob is calculated
  
    weighted_error = np.dot(error_prob, ambiguity_weight)
    
    # take the sum of the probabilities
    
    prob_S = np.dot(weighted_error, prob_Bj)
    
    prob_Bj_S = []
    
    # for branch k
    for k in range(len(weighted_error)):
        prob_Bj_S.append((weighted_error[k] / prob_S) * prob_Bj[k])
        
    # print out the branch number that has the max value
#     print(prob_Bj_S)
    br_index = prob_Bj_S.index(max(prob_Bj_S))
    
    return np.array(prob_Bj_S)

In [9]:
tree = Tree(file_tree, format = 3)    
# print(tree.get_ascii())

obs_geno_mat = pd.read_csv(file_obs, index_col = 0)
# display(obs_geno_mat.head(14))

cells_map_clone = pd.read_csv(file_map, index_col = 0)
# display(cells_map_clone)

clones = []
cells = list(obs_geno_mat.columns)

cn_inter = 0
cn_leaf = 0

root_node = tree.get_tree_root()
root_node.name = 'R'
childrens = root_node.get_children()
right_subtree = childrens[1]

for node in childrens[1].traverse('preorder'):
    if node.is_leaf():
        clones += [node.name]

# print(tree.get_ascii())
# print(clones)

true_loc = obs_geno_mat.iloc[:,0:(obs_geno_mat.shape[1] - ncells)]

obs_geno_mat1 = obs_geno_mat.iloc[:,(obs_geno_mat.shape[1] - ncells):]
obs_geno_mat1 = obs_geno_mat1.replace([2], 1)

# display(obs_geno_mat1)

edges = utils.get_edges(tree)
branch_id = edges['branch_id']
edge_lst = edges['edge_lst']
branch_time_lst = edges['branch_time_lst']
clone_branch = [branch_id.get(clone) for clone in clones]
# print(branch_id)
# print(edge_lst)
# print(branch_time_lst)
# print(clone_branch)

descendants = utils.get_descendants(tree, branch_id)
desc_leaves = descendants['desc_leaves']
desc_branches = descendants['desc_branches']
# print(desc_leaves)
# print(desc_branches)

ancs_branches = utils.get_ancestors(tree, branch_id)
# print(ancs_branches)

true_geno_mat = get_true_geno_mat(clones, clone_branch, desc_leaves, desc_branches, ancs_branches, d_dollo)
# display(true_geno_mat)
# print(true_geno_mat.shape)

In [10]:
display(true_geno_mat)

Unnamed: 0,type,branch1,branch2,branch3,branch4,CN0,CN1,CN2,CN3,CN4,CN5,CN6,CN7,CN8,CN9,CN10,CN11,CN12,CN13,CN14
0,N,1,-,-,-,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,B,1,26,28,29,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0
2,B,1,24,28,29,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0
3,B,1,23,28,29,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0
4,B,1,22,28,29,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2837,B,27,28,29,-,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2838,B,27,29,-,-,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2839,B,27,28,-,-,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2840,N,28,-,-,-,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [11]:
true_geno_mat_cells = true_geno_mat.iloc[:,0:(d_dollo + 2)].copy()
count_clone_cells = {}

for clone in clones:
    count_clone_cells[clone] = 0

for cell in cells:
    true_geno_mat_cells[cell] = true_geno_mat[cells_map_clone.loc[0, cell]]
    count_clone_cells[cells_map_clone.loc[0, cell]] += 1
                                           
display(true_geno_mat_cells)

Unnamed: 0,type,branch1,branch2,branch3,branch4,C0,C1,C2,C3,C4,...,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99
0,N,1,-,-,-,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,B,1,26,28,29,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,B,1,24,28,29,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,B,1,23,28,29,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,B,1,22,28,29,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2837,B,27,28,29,-,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2838,B,27,29,-,-,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
2839,B,27,28,-,-,0,0,0,0,0,...,0,0,1,1,1,1,1,1,1,1
2840,N,28,-,-,-,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0


In [12]:
clone_map_cells = {}

for cell in cells:
    clone = cells_map_clone.loc[0, cell]
    if clone not in clone_map_cells:
        clone_map_cells[clone] = []
    clone_map_cells[clone].append(cell)
# clone_map_cells   

In [13]:
lambda_ = 0.1

prob_Bj = get_mutation_prob_Bj(lambda_, branch_time_lst, desc_branches, true_geno_mat, rec_rate, d_dollo)
# print(prob_Bj)

In [14]:
obs_geno_mat1

Unnamed: 0,C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,...,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99
0,1,1,0,0,1,0,1,1,1,1,...,1,1,0,1,0,1,0,1,1,1
1,1,1,0,1,0,1,1,1,1,1,...,1,1,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
3,1,1,0,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,0
4,1,1,1,1,1,1,1,1,1,0,...,1,1,1,1,1,1,1,1,1,1
5,0,1,1,1,0,1,1,1,1,1,...,1,1,1,0,1,0,1,1,1,1
6,1,1,1,1,1,1,1,0,0,1,...,1,1,1,1,1,1,1,0,1,0
7,1,1,1,1,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
8,0,0,1,0,1,0,1,0,1,0,...,1,1,1,1,1,1,1,1,1,1
9,1,1,1,1,0,0,1,1,1,1,...,1,1,0,1,1,0,1,1,0,1


In [15]:
prob_matrix_all = []

for i in range(obs_geno_mat.shape[0]):
    
    print("Mutation Site" + str(i) + ": ", end = '')
    
    # for missing values in each site
    obs_geno_miss_dict = fill_missing(ncells, obs_geno_mat1.iloc[i,:], cells, cells_map_clone, clone_map_cells)
    obs_geno_i_mat = obs_geno_miss_dict['obs_geno_i_mat']
    ambiguity_weight = obs_geno_miss_dict['ambiguity_weight']
    count_miss = obs_geno_miss_dict['count_miss']
    print(count_miss)
#     if i == 0:
#         display(obs_geno_mat1.iloc[0,:])
#         display(obs_geno_i_mat)
        
    generate_prob_br_all = np.zeros((true_geno_mat.shape[0], iterations))
     
    for j in range(iterations):
        
        generate_prob_br = generate_prob_binary(fp, fn, obs_geno_i_mat, ambiguity_weight, true_geno_mat_cells, prob_Bj, d_dollo)

        generate_prob_br_all[:,j] = generate_prob_br
    
    generate_prob_br_all_mean = np.mean(generate_prob_br_all, axis = 1)
        
    prob_matrix_all.append(generate_prob_br_all_mean)
    
    print(u'\u2705')

prob_matrix_all = np.array(prob_matrix_all)

max_prob_branch = np.argmax(prob_matrix_all, axis = 1)

prob_matrix_all_df = pd.DataFrame(prob_matrix_all)

# prob_matrix_all_ = pd.concat([true_loc, prob_matrix_all_df], axis = 1)

prob_matrix_all_df.to_csv(file_out)

# display(prob_matrix_all_df)
# print(max_prob_branch) 

Mutation Site0: (0, 0)
✅
Mutation Site1: (0, 0)
✅
Mutation Site2: (0, 0)
✅
Mutation Site3: (0, 0)
✅
Mutation Site4: (0, 0)
✅
Mutation Site5: (0, 0)
✅
Mutation Site6: (0, 0)
✅
Mutation Site7: (0, 0)
✅
Mutation Site8: (0, 0)
✅
Mutation Site9: (0, 0)
✅
Mutation Site10: (0, 0)
✅
Mutation Site11: (0, 0)
✅
Mutation Site12: (0, 0)
✅
Mutation Site13: (0, 0)
✅
Mutation Site14: (0, 0)
✅
Mutation Site15: (0, 0)
✅
Mutation Site16: (0, 0)
✅
Mutation Site17: (0, 0)
✅
Mutation Site18: (0, 0)
✅
Mutation Site19: (0, 0)
✅
Mutation Site20: (0, 0)
✅
Mutation Site21: (0, 0)
✅
Mutation Site22: (0, 0)
✅
Mutation Site23: (0, 0)
✅
Mutation Site24: (0, 0)
✅
Mutation Site25: (0, 0)
✅
Mutation Site26: (0, 0)
✅
Mutation Site27: (0, 0)
✅
Mutation Site28: (0, 0)
✅
Mutation Site29: (0, 0)
✅
Mutation Site30: (0, 0)
✅
Mutation Site31: (0, 0)
✅
Mutation Site32: (0, 0)
✅
Mutation Site33: (0, 0)
✅
Mutation Site34: (0, 0)
✅
Mutation Site35: (0, 0)
✅
Mutation Site36: (0, 0)
✅
Mutation Site37: (0, 0)
✅
Mutation Site38: (0, 0

In [16]:
location = true_geno_mat.iloc[max_prob_branch, : d_dollo + 2]
location = location.set_index([pd.Index(range(location.shape[0]))])

print("Predicted mutation location: ")
display(location)

location.to_csv(file_loc)

for i in range(location.shape[0]):
    j = 1
    while j < (d_dollo + 2) and location.iloc[i, j] != '-':
        location.iloc[i,j] = edge_lst[location.iloc[i,j]]
        j += 1
        
display(location)

Predicted mutation location: 


Unnamed: 0,type,branch1,branch2,branch3,branch4
0,N,1,-,-,-
1,B,1,12,22,29
2,B,1,3,10,21
3,N,1,-,-,-
4,N,1,-,-,-
5,N,1,-,-,-
6,B,1,9,19,-
7,B,1,17,25,-
8,B,1,7,22,-
9,N,1,-,-,-


Unnamed: 0,type,branch1,branch2,branch3,branch4
0,N,"(R, I0)",-,-,-
1,B,"(R, I0)","(I5, CN5)","(I11, CN9)","(I13, CN14)"
2,B,"(R, I0)","(I1, I2)","(I3, I5)","(I10, I11)"
3,N,"(R, I0)",-,-,-
4,N,"(R, I0)",-,-,-
5,N,"(R, I0)",-,-,-
6,B,"(R, I0)","(I4, CN3)","(I8, CN8)",-
7,B,"(R, I0)","(I9, CN6)","(I6, I12)",-
8,B,"(R, I0)","(I3, I4)","(I11, CN9)",-
9,N,"(R, I0)",-,-,-


In [17]:
genotype_mat = obs_geno_mat.copy()
location_pre = location.copy()

genotype_mat.insert(0, "pre_type", location_pre['type']) 
for i in range(1, d_dollo + 2):
    genotype_mat.insert(i, "pre_branch" + str(i), location_pre['branch' + str(i)])


display(genotype_mat)

genotype_mat.to_csv(file_result)

Unnamed: 0,pre_type,pre_branch1,pre_branch2,pre_branch3,pre_branch4,C0,C1,C2,C3,C4,...,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99
0,N,"(R, I0)",-,-,-,1,1,0,0,1,...,1,1,0,1,0,1,0,1,1,1
1,B,"(R, I0)","(I5, CN5)","(I11, CN9)","(I13, CN14)",1,1,0,1,0,...,1,1,1,0,0,0,0,0,0,0
2,B,"(R, I0)","(I1, I2)","(I3, I5)","(I10, I11)",0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
3,N,"(R, I0)",-,-,-,1,1,0,1,1,...,1,1,1,1,1,1,1,1,1,0
4,N,"(R, I0)",-,-,-,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
5,N,"(R, I0)",-,-,-,0,1,1,1,0,...,1,1,1,0,1,0,1,1,1,1
6,B,"(R, I0)","(I4, CN3)","(I8, CN8)",-,1,1,1,1,1,...,1,1,1,1,1,1,1,0,1,0
7,B,"(R, I0)","(I9, CN6)","(I6, I12)",-,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
8,B,"(R, I0)","(I3, I4)","(I11, CN9)",-,0,0,1,0,1,...,1,1,1,1,1,1,1,1,1,1
9,N,"(R, I0)",-,-,-,1,1,1,1,0,...,1,1,0,1,1,0,1,1,0,1


In [18]:
utils.show_tree(tree)