In [1]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
from collections import defaultdict
import csv
import ete3

In [2]:
taxa = sorted([x.split("/")[-1] for x in glob.glob("./output_dicts/*") if os.path.isdir(x)], key=float)

In [3]:
taxa

['10', '20', '30', '40', '50']

In [6]:
methods = ['PSP', 'PSML', 'SICP', 'SICML', 'BEP', 'BEML', 'fastmlp', 'fastmlml']

output_file = "indel_output.csv"

methods_dict = defaultdict(dict)
for taxon in taxa:
    for method in methods:
        with open(f'./output_dicts/{taxon}/gaps_{method}_1.p', 'rb') as handle:
            methods_dict[method][taxon] = pickle.load(handle)
        
methods_dict

defaultdict(dict,
            {'PSP': {'10': {'N0_N1': [],
               'N0_sequence5': [<gap_checker.Indel at 0x1191997b8>,
                <gap_checker.Indel at 0x1191995c0>,
                <gap_checker.Indel at 0x1191994e0>,
                <gap_checker.Indel at 0x119199518>,
                <gap_checker.Indel at 0x119199320>],
               'N1_sequence3': [<gap_checker.Indel at 0x119199390>,
                <gap_checker.Indel at 0x1191993c8>,
                <gap_checker.Indel at 0x119199400>],
               'N1_N2': [],
               'N2_N3': [],
               'N2_N7': [],
               'N3_N4': [<gap_checker.Indel at 0x119199208>],
               'N3_N5': [],
               'N7_sequence8': [<gap_checker.Indel at 0x1191991d0>,
                <gap_checker.Indel at 0x1191990b8>,
                <gap_checker.Indel at 0x1191990f0>],
               'N7_N8': [<gap_checker.Indel at 0x119199160>],
               'N4_sequence4': [],
               'N4_sequence6': [<gap_checker.In

In [7]:


def indel_present(indelible_branch, indelible_indel, method_gap_dict):
    for method_indel in method_gap_dict[branch]:
        if method_indel.category == indelible_indel.category and method_indel.start == indelible_indel.start and method_indel.end == indelible_indel.end:
            return True
        else:
            return False
    else:
        return False

with open (output_file, 'w+') as output:
    writer = csv.writer(output, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['Taxa', 'Name', 'Category', 'Start', 'End', 'Terminal', 'Content', 'Gapped_Content', 'Width', 'Descendants', 'Root_Dist', 'Child_Extant'] + methods)

    for taxon in taxa:
        
        indelible_tree = ete3.Tree(f"./indelible_output/{taxon}/cleaned_N0_trees/1.nwk", format=1)
        with open(f'./output_dicts/{taxon}/gaps_indelible_1.p', 'rb') as handle:
            indelible_gap_dict = pickle.load(handle)


        for branch, indel_list in indelible_gap_dict.items():
            child_node = branch.split("_")[1]
            tree_node = indelible_tree&child_node
            descendants = len([x for x in tree_node.iter_descendants()])
            root_dist = tree_node.get_distance(indelible_tree.get_tree_root())
            child_extant = False
            for child in tree_node.children:
                if child.is_leaf():
                    child_extant = True

            for indel in indel_list:
                print (indel)
                method_presence = [indel_present(branch, indel, methods_dict[method][taxon]) for method in methods]
                print (method_presence)
                print (branch)
                print (branch.split("_")[1])
                
            
                
                writer.writerow([taxon, f'{branch}_{indel.start}_{indel.end}', indel.category, indel.start, indel.end, indel.terminal, indel.content, indel.gapped_content, indel.width, descendants, root_dist, child_extant] + method_presence)


                    
    "with open (output_file, 'w+') as output:\n",
    "    \n",
    "    writer = csv.writer(output, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)\n",
    "    writer.writerow(['Name', 'Root', 'Type', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species', 'Extra'])\n",

            

<gap_checker.Indel object at 0x11982f9e8>
[False, False, False, False, False, False, False, False]
N0_N1
N1
<gap_checker.Indel object at 0x11982fc50>
[True, True, True, False, True, True, True, True]
N0_sequence5
sequence5
<gap_checker.Indel object at 0x11982fcc0>
[False, False, False, False, False, False, False, False]
N0_sequence5
sequence5
<gap_checker.Indel object at 0x11982fd30>
[False, False, False, False, False, False, False, False]
N0_sequence5
sequence5
<gap_checker.Indel object at 0x11982fd68>
[False, False, False, False, False, False, False, False]
N0_sequence5
sequence5
<gap_checker.Indel object at 0x11982fe10>
[True, True, True, False, True, True, True, True]
N1_sequence3
sequence3
<gap_checker.Indel object at 0x11982fe48>
[False, False, False, False, False, False, False, False]
N1_sequence3
sequence3
<gap_checker.Indel object at 0x11982fe80>
[False, False, False, False, False, False, False, False]
N1_sequence3
sequence3
<gap_checker.Indel object at 0x11982ff98>
[True, Tru

<gap_checker.Indel object at 0x119852c18>
[True, True, True, False, True, True, True, True]
N24_sequence6
sequence6
<gap_checker.Indel object at 0x119852c50>
[False, False, False, False, False, False, False, False]
N24_sequence6
sequence6
<gap_checker.Indel object at 0x119852cc0>
[False, False, False, False, False, False, False, False]
N24_sequence6
sequence6
<gap_checker.Indel object at 0x119852cf8>
[True, True, True, False, True, True, True, True]
N25_sequence32
sequence32
<gap_checker.Indel object at 0x119852da0>
[True, True, True, False, True, True, True, True]
N32_sequence49
sequence49
<gap_checker.Indel object at 0x119852dd8>
[True, True, True, False, True, True, True, True]
N34_sequence3
sequence3
<gap_checker.Indel object at 0x119852e48>
[True, True, True, True, True, True, True, True]
N41_sequence23
sequence23
<gap_checker.Indel object at 0x119852eb8>
[False, False, False, False, False, False, False, False]
N41_sequence23
sequence23
<gap_checker.Indel object at 0x119852ef0>
[F