In [30]:
import pandas as pd
import numpy as np
from Bio import SeqIO
import itertools
import subprocess
import random
import os

In [54]:
class Neighbor_Joining_Tree(object):
    
    def __init__(self):
        np.set_printoptions(suppress=True)
        self.D = None
        self.identifiers = None
        self.tree = np.zeros(shape=(1,3))
        self.sums = None
        self.n = 0
        self.node_ids = None
        self.internal_node_id_tracker = 0
        self.Q = None
        self.msa = None
        self.root_id = 0
    
    def initialize_other_variables(self):
        self.sums = self.D.sum(axis=0)
        self.n = len(self.D[0])
        self.node_ids = np.arange(1, len(self.D)+1, 1) # 1 based counting, not 0 based
        self.internal_node_id_tracker = self.n + 1
        self.root_id = self.n * 2 - 2
        
    def upload_D(self, filename):
        with open(filename, 'r') as file:
            D = pd.read_csv(file, delimiter='\t', index_col=0)
            self.identifiers = D.columns
            self.msa = D.columns # for now (fix later)
            self.D = np.asarray(D.rename_axis().values)
            
    def upload_msa(self, fna):
        # Parse sequences
        self.msa = []
        self.identifiers = []
        fna_obj = list(SeqIO.parse(fna, 'fasta'))
        for i, seq in enumerate(fna_obj):
            self.msa.append(seq.seq)
            self.identifiers.append(seq.id)
            
    def set_msa(self, msa):
        self.msa = msa
    
    def set_identifiers(self, identifiers):
        self.identifiers = identifiers
            
    def parse_D(self):
        distances = []
        for pair in itertools.product(self.msa, repeat=2): # repeat=2? ... and we are doing too much work here but its ok for now, can just do half of the work and flip the matrix across the diagonal
            distances.append(self.calculate_dissimilarity_score(pair[0], pair[1]))
        
        # Reshape distances array into matrix based on number of sequences
        self.D = np.reshape(distances, (len(self.msa), len(self.msa)))
    
    def calculate_dissimilarity_score(self, a, b):
        '''
        If letters do not match, add 1. Otherwise add 0. 
        Sequences a and b are the same length.
        Returns dissimilarity score as float. 
        '''
        score = 0.00
        length = len(a)

        for i, char_a in enumerate(a):
            char_b = b[i]
            if char_a != char_b:
                score += 1

        return score/length
    
    def calculate_q_value(self, distance, sums_a, sums_b):
        return (self.n - 2) * distance - sums_a - sums_b
            
    def calculate_Q(self):
        print('Calculating Q matrix...')
        Q = np.zeros_like(self.D, dtype=float)
        
        for i, row in enumerate(self.D):
            sums_a = self.sums[i] # sum of distances from a to all other nodes
            for j, distance in enumerate(row):
                if i == j:
                    Q[j, i] = np.inf # assign diagonal to positive infinity so it's never the minimum
                else:
                    sums_b = self.sums[j] # sum of distances from b to all other nodes
                    Q[j, i] = self.calculate_q_value(distance, sums_a, sums_b)
        
        return Q
    
    def calculate_branch_length(self, distance, sums_f, sums_g):
        '''
        '''
        #print(distance/2 + abs((sums_f - sums_g))/(2*(self.n-2)))
        return distance/2 + abs((sums_f - sums_g))/(2*(self.n-2))

    
    def calculate_next_branch_length(self, distance, first_branch_length):
        return abs(distance - first_branch_length) 
        
    
    def get_branches(self, f, g, h):
        # Get node ids for the pair
        f_id = self.node_ids[f]
        g_id = self.node_ids[g]
        
        # Track new, ancestral node id
        u_id = self.internal_node_id_tracker
        self.internal_node_id_tracker += 1
        
        # Calculate lengths of branches joining f and g to u
        distance_fg = self.D[f, g]
        delta_fu = self.calculate_branch_length(distance_fg, self.sums[f], self.sums[g])
        delta_gu = self.calculate_next_branch_length(distance_fg, delta_fu)
        
        # Format and return branches 
        branch_fu = [u_id, f_id, delta_fu]
        branch_gu = [u_id, g_id, delta_gu]
        branch_hu = None
        
        # If h has been specified, we need to get the third branch
        if h:
            h_id = self.node_ids[h]
            distance_fh = self.D[f, h]
            delta_hu = self.calculate_next_branch_length(distance_fh, delta_fu)
            branch_hu = [u_id, h_id, delta_hu]
            
        # Insert new node (if it goes above, it interferes with h assignment)
        self.node_ids = np.insert(self.node_ids, 0, u_id, axis=0)
        
        return branch_fu, branch_gu, branch_hu
    
    def calculate_uk(self, fk, gk, fg):
        '''
        Calculates and returns distance of node u to node k. 

        dist_fk — distance of node f to node k
        dist_gk — distance of node g to node k
        dist_fg — distance of node f to node g

        where f and g are members of the pair just joined.
        '''

        return (fk + gk - fg)/2
    
    def update_D(self, f, g):
        row_u = np.zeros_like(self.D[0]) 
        for k,_ in enumerate(self.D):
            row_u[k] = self.calculate_uk(self.D[f, k], self.D[g, k], self.D[f, g])

        # Update D matrix with these distances
        column_u = row_u[:, np.newaxis] 
        column_u = np.vstack([0, column_u]) # Add its own diagonal value
        self.D = np.vstack((row_u, self.D))
        self.D = np.hstack((column_u, self.D))
        
    def preorder_traversal(self, root):
        traversed_tree = []
        children = np.where(self.tree[:,0] == root)[0]

        if children.size > 0:
            # Traverse descendant subtrees
            for child in children: 
                traversed_tree.append(self.tree[child])
                traversed_tree += self.preorder_traversal(self.tree[child][1])
    
        return traversed_tree
    
    def tree_to_newick(self, root):
        newick_tree = ''
        children = np.where(self.tree[:,0] == root)[0]

        # If we have children, traverse them
        if children.size > 0:
            newick_tree += '('

            for i, node in enumerate(children):
                # Get its id and branch length to root
                node_id = int(self.tree[node][1])
                branch_length = self.tree[node][2]

                newick_tree += self.tree_to_newick(node_id)
                # If node is internal, 
                if node_id > len(self.msa):
                    newick_tree += ':' + str(branch_length) + ','
                else:
                    taxonomic_id = self.identifiers[node_id - 1]
                    newick_tree += str(taxonomic_id) + ':' + str(branch_length) + ','

            newick_tree = newick_tree[:-1] + ')' # removes final comma

        return newick_tree
            
    def run(self):
        self.initialize_other_variables()
        
        while len(self.D) >= 3:
            print(len(self.D))
            # Calculate the join score of each pair
            self.Q = self.calculate_Q()
            print(self.Q)

            # Find the pair f,g with the minimum join score 
            if len(self.D) == 3:
                # If on the last iteration, we have 3 nodes to join
                f, g, h = 0, 1, 2
            else:
                pairs = np.where(self.Q == np.amin(self.Q))
                f = pairs[1][0]
                g = pairs[0][0]
                h = None
            #print(self.node_ids)
            print('Joining ' + str(self.node_ids[f]) + ' and ' + str(self.node_ids[g])) # only works if h is not None
            if h:
                print(self.node_ids[h])
                
            
            # Get branch lengths for f,g to their ancestral node u
            branch_fu, branch_gu, branch_hu = self.get_branches(f, g, h)
            self.tree = np.vstack((self.tree, branch_fu))
            self.tree = np.vstack((self.tree, branch_gu))
            if branch_hu:
                self.tree = np.vstack((self.tree, branch_hu))
                break # since we're on the last iteration, we can quit
            
            # Update D to include distances between u and every other node k
            self.update_D(f, g)

            # Remove f,g from D 
            self.D = np.delete(self.D, [f+1, g+1], axis=1)
            self.D = np.delete(self.D, [f+1, g+1], axis=0)
            
            # update variables
            self.sums = self.D.sum(axis=0)
            self.node_ids = np.delete(self.node_ids, [f+1, g+1])
            print(self.node_ids)
            self.n = len(self.D)
            
            print()
            
        self.tree = np.delete(self.tree, 0, axis=0) # remove that initialized beginning [0,0,0]
    
    def print_tree(self, additional_id):
        edges_dir = 'edges/'
        newick_dir = 'newick/'
        if not os.path.exists(edges_dir): 
            os.makedirs(edges_dir)
        if not os.path.exists(newick_dir):
            os.makedirs(newick_dir)
            
        print('PRINTING TREE FILES FOR ' + str(additional_id) + '...')
        # Make edges text file
        traversed_tree = self.preorder_traversal(self.root_id)
        edges_file = edges_dir + 'edges' + str(additional_id) + '.txt'
        np.savetxt(edges_file, traversed_tree, fmt='%i\t%i\t%1.10f')
        
        # Make newick tree
        newick_tree = self.tree_to_newick(self.root_id) + ';'
        newick_file = newick_dir + 'tree' + str(additional_id) + '.txt'
        with open(newick_file, 'w') as text_file:
            text_file.write(newick_tree)
        
        # Visualize tree (using either edges or newick)
        cmd_edges = 'Rscript hw3-plot-edges.r ' + edges_file + ' tip-labels.txt'
        cmd_newick = 'Rscript hw3-plot-newick.r ' + newick_file + ' tip-labels.txt'
        print(cmd_newick)
        proc = subprocess.Popen(cmd_newick,shell=True,universal_newlines=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        proc.communicate()


In [55]:
njt = Neighbor_Joining_Tree()
njt.upload_msa('hw3.fna')
njt.parse_D()
njt.run()
njt.print_tree('')

61
Calculating Q matrix...
[[         inf -39.74831763 -37.30215343 ... -15.3795424  -14.09959623
  -14.11507402]
 [-39.74831763          inf -37.20188425 ... -16.90713324 -11.93472409
  -10.63997308]
 [-37.30215343 -37.20188425          inf ... -15.2153432  -12.62516824
  -12.2833109 ]
 ...
 [-15.3795424  -16.90713324 -15.2153432  ...          inf -12.89703903
  -12.5551817 ]
 [-14.09959623 -11.93472409 -12.62516824 ... -12.89703903          inf
  -15.04710633]
 [-14.11507402 -10.63997308 -12.2833109  ... -12.5551817  -15.04710633
           inf]]
Joining 2 and 1
[62  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
 50 51 52 53 54 55 56 57 58 59 60 61]

60
Calculating Q matrix...
[[         inf -36.77153432 -13.63829071 ... -15.6628533  -12.53667564
  -11.89703903]
 [-36.77153432          inf -13.21063257 ... -14.70827725 -12.1090175
  -11.76211306]
 [-13.63829071 -13.21063257          inf ...

Joining 63 and 91
[99 98 97 95 84 83 82 79 74  6 12 19 24 36 38 41 42 43 45 47 53 58 61]

23
Calculating Q matrix...
[[        inf -4.01166379 -3.87675523 -3.41868532 -3.42599968 -3.3193467
  -2.89695647 -3.20839974 -3.15995277 -2.97817545 -3.6520693  -3.25957792
  -3.39474048 -3.71359668 -2.84487402 -3.20909108 -3.49112969 -3.11350625
  -3.24039366 -3.00770103 -3.8292753  -3.07673072 -2.90073392]
 [-4.01166379         inf -4.06663914 -3.87320591 -3.68184566 -3.36326942
  -2.62313231 -3.31028532 -3.73625058 -2.99946506 -3.33331011 -2.94965117
  -3.95392544 -3.76579981 -2.60936054 -3.3683875  -3.34901923 -2.56024589
  -3.38467493 -3.0769066  -3.06801107 -3.41223424 -2.53207147]
 [-3.87675523 -4.06663914         inf -3.29853001 -3.97437698 -3.2336379
  -3.03514992 -3.40002943 -3.53706362 -3.48341183 -3.19324527 -3.18750419
  -3.3600942  -3.49777505 -3.02121255 -3.22412725 -3.38604473 -3.01626008
  -3.0988749  -3.00352665 -3.25010934 -2.818403   -3.06658393]
 [-3.41868532 -3.87320591 -3.2

In [None]:
njt = Neighbor_Joining_Tree()
njt.upload_D('example.txt')
njt.run()
njt.print_tree('')

### Bootstrapping

In [None]:
random.seed(100)

for i in range(0, 100):
    print('DOING BOOTSTRAP ID: ' + str(i))
    # Random assignment needs to be random sampling repeatedly, can be more than once. 
    bootstrap = []
    for j in range(0, len(njt.msa[0])):
        bootstrap.append(random.randint(0,len(njt.msa[0])-1))
    permuted_msa = []
    for seq in njt.msa:
        seq = ''.join([list(seq)[i] for i in bootstrap])
        permuted_msa.append(seq)
    
    njt2 = Neighbor_Joining_Tree()
    njt2.set_msa(permuted_msa)
    print(njt2.msa[0])
    njt2.set_identifiers(njt.identifiers)
    njt2.parse_D()
    njt2.run()
    njt2.print_tree(i)

DOING BOOTSTRAP ID: 0
AGAGAAACGG-ACCCACA-AGG-AA-GAGG-A-GAGCTAACT-GACA-AACCAGTGCATCG-GG-TTCAT-ACTGTGGACCGGT-GTCGTATGGCTGGGGG-GGGGCCACAATCGTCA-CA-G-CG-TTG-CTCCCCTGAGGGACTAATG-CAAGGGTGGTCAAAAC-CATGCTCTGCTG-A-GACTTTGAACGCAGCTGGCA-GATGTGCT--GGCGCAAAAT-CATCGGGGT-AGAGAGTAATCGGGGGCGAGCATAGCG-CGTGGAGCGCGCCTATGACAGGGGGCCT-TGAACCGCGAC--GGAGGTTTCAACCCTGGAGAAACATA-CGAG-CGCTTTCAAATTGAC-CCGGGGGC--AGGA--ACTAGTC-CAAGGTCGAACTTATG-CC-ATTTTCATTACCGTCCC-CCGGAGGCAGGTGAGGTGG-GGTTACAT-A-AAAAT-GCGCTCCCA-AAGTAGTCGATGTGTGCTG-GATATCAGGGCCGTG--GTAGAG-CTG-AC-GGGAAGGCCTCCAT-GCACCGTGAGG---CAT-GAGA-GGCAGT-TAAA--GGTAGGTGGTGGCGGCCCGGA--GCCCGATTATCTGGAA--TGTGCAGGGGTCGATA-TCC-CG-CT-CTCTAGATTCGGGGCTACA-CCGGCGGAGAGGACTCCGGAGACGAGCTTCCTACCTCGGAGAATTATAATGAG-TTAGGATTGATTCGGTG-TT-G-CCTTCGGCTAATT-AGACCAGG-ACA-ACACGCAACCCGGCAGCTTAATGTCT--GAACG-GG-GCGCT-CTTTAGTCAGCGATAGCCGTCT-GCTGGCCGC-CC--CCAT-AGCTATAGGGG-GGGG-CATGATCGGGTA-TAGGTCAGAG-GCT-GA-CCCACCTCTCGTTATCAGAGTGA-TCTG-GAGTGGCAAG-TGATGGAGCATGC-TG-C-AGC-G-AATCGGTGCTTGATGTCCTCAC-T

[110 109 108 107 106 105 104 102  82   6  12  36]

12
Calculating Q matrix...
[[        inf -1.01742492 -1.0543217  -0.98329666 -0.77301568 -1.04346426
  -0.9938449  -1.00664577 -0.95091007 -1.22525957 -1.00747627 -0.87276651]
 [-1.01742492         inf -1.08885035 -1.12802046 -1.1372886  -1.0836709
  -0.83450026 -0.87567139 -0.93749121 -1.02425659 -0.92760384 -0.87364778]
 [-1.0543217  -1.08885035         inf -1.18258211 -1.18301782 -0.90312152
  -0.8712886  -0.9054181  -0.93485232 -1.05442389 -0.96450062 -0.78604927]
 [-0.98329666 -1.12802046 -1.18258211         inf -1.14879459 -0.94386885
  -0.83777677 -1.0018939  -0.89326873 -1.01872859 -0.93048769 -0.85970794]
 [-0.77301568 -1.1372886  -1.18301782 -1.14879459         inf -0.86239423
  -1.12201363 -0.85202047 -0.88066608 -0.79793281 -1.07728948 -1.09399291]
 [-1.04346426 -1.0836709  -0.90312152 -0.94386885 -0.86239423         inf
  -1.13737387 -0.98159696 -0.98687342 -0.92958597 -1.0390234  -1.01745293]
 [-0.9938449  -0.83450026 -0.

DOING BOOTSTRAP ID: 1
TTTGGGTGCGTGGG-ACTTG---GGTAGTT-TGGGAGAGGTTATCGGAAAATCATGCGGCCC-A-GGCAGT-CCTGG-CACA-GAGGCGA-AC-AGGCGAAGGCGAATACAAGGATGGTAAATCC-AAT-ACG-TGTC-TCTATTGCTACCGGCGAG-CTTC-G-GATCTCTACGCCA-ACCCGCCGAGGG-ACATAATCCA-TAGGAAGGGG-A-G-TCGTTACTGA-GCCTTTGA-AGTTGCTCAGCG-CAGTTCGACGCGTCGT-CT-AGAGCAGC-ATAACCT-GA-T-GTCTAGGTGCA-CCCACGCGGC-TG--TTGACATCTACGGAAGGACCGAAAGAACTACATG-G--TAAGGGCTCAGCG--CGACTGCGGAGT-CGAGC-AT--G--GTCCCGTACATTAA-GGCCCGGTG-CCCGCA-T--CTCGTGGCTCCGATCAGATCGG-ACAATTTCGTG-CCCATGTGGAATAT-G-GCC-GGCCAGGCAGA-GGGC-GGCCTGCGTC-ACCAACTACTCTTTC-CCGGCGCCCTGTCCCCG--CGAAC-TCTAAATCGTCAAG--AGT--TGTCAACTTACCTGCAAGACTC-TAACCCGACGTCCTTATACCCGGAG-GG--C--CAAGTAGCGAGGGCGGCAGCCGACGGCGCGGATGATTGAGCAATATGGGTC-G--GA-GA-TAGAAAAGA-CAGTAGGGTCCCTGCTATAGAAGCGGGAGGTGGCGGATTCTGTTCGG-TAAAAGTGGGGC-GAC-TC--GCGTATT-ACGGATTATCAGGGTGAACA-TGCG-GTTACAGGTCATACTAAAT-GAGGG-TATCCCCCTTCGACC--GACGGTAGA-ATATACCGGGGGCGCTAGCT-TGCGAGTAGCG-AAGCAGGCTATGTGACTCTGCACAACCGTCATG-AGGCTGGCCGGAACCCTTCTGGCATGGGAGC-GGTGCAGGTAGC-GC-

  -2.46433904         inf]]
Joining 39 and 98
[103 102 101 100  99  87  84  81  77   6  12  36  38  41  43  47  53  58
  61]

19
Calculating Q matrix...
[[        inf -2.27279873 -3.0700562  -3.20529039 -2.78648703 -2.69169804
  -2.08761198 -2.46426018 -2.80198257 -2.29194093 -2.50972357 -2.8025714
  -2.08223366 -2.71417343 -1.96295371 -2.483752   -2.54884915 -2.56905861
  -1.96352151]
 [-2.27279873         inf -2.59548889 -2.45620518 -2.62437765 -2.10192526
  -2.41161781 -2.58520409 -2.127839   -3.0563909  -2.6535477  -2.57459203
  -2.74658269 -1.97710927 -2.5701022  -2.54463535 -2.85283479 -2.69286255
  -2.46484901]
 [-3.0700562  -2.59548889         inf -3.14335162 -2.62934064 -2.48438946
  -2.10177674 -2.51238775 -2.27023968 -2.37295881 -2.77271066 -2.86070907
  -2.1164186  -2.3050542  -2.37787974 -2.18653132 -3.00238555 -2.3533487
  -2.15393543]
 [-3.20529039 -2.45620518 -3.14335162         inf -2.91082723 -2.23231344
  -2.2833523  -2.57710441 -2.67641674 -2.60896427 -2.47353358 -2

DOING BOOTSTRAP ID: 2
CCGTACCCCGATCAAAAAGACGGACGACTCCTATGCAGAGGTCCGAGC--CAATCAGCTGCCGCA-CCAAGAGGGCAGGCCGG-TGGCAGGCATG-GATAAG-AAGGCGCGACACAGGCAGAAGAAGAGG-TGG-GTCTCACGGGA-TGGC-TGTGGGGCCTA-TGTCGTCTTTAA-GCAT-TGAGGCT-GAAAAA-CAAAATCCCCCTAGGCTG-CCGG-A-CGCCCCCGATGGC-CTCTGGGGAAT-GCTAACAGGGTTCG-CATAT-TGGAGG-ATA-CAGACCGCTGGTCCCGTTCAAAGATACTGTA--GGAGGGTACGGGGACAC-TGCTCGGGGCAAC-TTTACCCCATGGTCCT-CATGATACTG--G-CCCGGGACAAGGGAGCAAGAGGTGA-GTGGGTAAATACC-ACTTTTACTTG-TCTAGTA-GACATCGTAAA-AGAA--C-GTCGTCTT-GTAATATCGTGAGAGGGGG--TTCC-GTGTGA--CGGTACC-AGCACT-CCC-CTAAGCGAGAG-C-GC-GCAACGAAAGACATCGGCGTGTGCTCATCTTGTAAGAGGTTGA-AA-G-CTGGAATTGC-CGGAGGCACCCCGCCCCCG---CG-TTGGGG-AGCGTCAC-GCCTGGC-GTCTAG-TTGG-G-ATC-AGAGACTAGGCAGAAC-CAGG-T-TTTAGGGCTCC-CAACCCACTAGGCACAGGCCG-TC-ACGCCACAGGG-GCGGAAGG--TGGCACCG--CGTAGTGACTACGA-GCA-GGTGGG-TGGACTCGAC-ATGCGTACC-CCTAGTAGCCTCCCCGCCCCAGTA-ATACGTGGGGGTG-ATTAGCCGCTGGGATAA-TCGGATTG-GTAG-C-GACAG-AAGTGTCAGCAG-AAGTGGCAA-ATCAACTGCTG-T-ACAT-ATAAGCTAGGCAAAGCTCGGT-CGT-GTT-CAGCGGG-T-ATGGCACGGCGCCC

  -2.51398913 -2.55591165 -2.93440323 -2.5062522  -2.22498249         inf]]
Joining 47 and 38
[105 104 103 102 101  99  86   6  12  19  31  36  41  42  43  58  61]

17
Calculating Q matrix...
[[        inf -2.15751714 -2.34171588 -2.07010293 -2.05507256 -2.51550934
  -2.11532983 -2.33754906 -2.29281842 -2.38948036 -2.28353253 -1.83807217
  -2.2885757  -2.23897736 -2.25138549 -2.48947484 -2.4578818 ]
 [-2.15751714         inf -2.15545755 -2.36794378 -2.26581548 -2.0201749
  -2.11804226 -2.17345069 -2.18045289 -2.33965163 -2.38219913 -2.3906629
  -2.22084551 -2.37478751 -2.38530297 -2.20613859 -2.38455248]
 [-2.34171588 -2.15545755         inf -2.38185112 -2.192321   -2.44314514
  -2.08334248 -2.10241568 -2.30499325 -1.99788669 -2.088776   -2.08241389
  -2.52282321 -2.11992743 -2.34431402 -2.46631993 -2.49529216]
 [-2.07010293 -2.36794378 -2.38185112         inf -2.45589169 -2.25032175
  -2.35389422 -2.15321129 -2.42393465 -2.14120601 -1.99213105 -2.52881589
  -2.4705573  -2.39895674 -1.

DOING BOOTSTRAP ID: 3
TCTTGACAGAGACGC-AGA-TCCAGCGCCGCGGTGTGTGT--TT-T-G--GCCGCCGGC-GTGTTGTGCGACCAAG-T--GTGTCTAATAATTCTGCTCAGATGTTTGTGGA-GAATTCCTGGGG-GTGTGTCGGGATTAGGG--A-T-A-GA-CAAG-TATGATCGGAG-AC-CCCAACC-CGGGTACCG-AGCAACCGGGGGTGCATCTGAAGC-GATACTGGCAGGT-CTCGGCGGTTGGTGAACAGGCATGCA-GTA-CTCCAT-GAGGATC-GGGATCT-AGTCTCGGGAGCCTAGG-TA-GACTTTGGGGAGGGCTG-GTGTATAAACGT----CGTCGCGCG-CGCGCGCGAAAACACCTTGCGGACAG-TGCTCTAACTA-CGATAACGGGT-CCAAGTTCGTAGAGTCA-GG-GGA--TC-GGATACCGGTCGCA-GCGTAACTCGACGGTATGCACATGGACACCGGCGTGAGTTACCGAAACCGCAGCAGTCGTGAGTCGAGCG-TCG-CGCATGGTATGCGGGCAGGGGCAATTGTCC-TGGTAGAGAAGACCGTGCCGTGGGCG-GCGCGAGAAATG-CCATTTTCGGGATTC-AC-CACGT-ACTTCTAACCCGAGTTTCCGC-AGCAGGGG--TACG-AGAC-CGAGGCTTCGATGG--CAAAGAGGCGGAACCTC-TGTGG-CAATG-GGTATTCACGA-AG-GTGC-AG-CAGGCATATACC-CAACCCCTGTT-GTGGCGCGGTGGTCAGACT-ACGCATAATCGGGAGCAAGCTAATTGCAAACGTCGCACCTGTGCGCCGATAAATGGGCGTATC-C-GG-GCGGGACCGTGTGAATTCCGCCGACGT--T-CGATTATGG-AAAGATT-ACGTAA-GCACCTGACGACTTCTTCCGCGG-CTACACACG-ACT-CGGGAGACTGTGCGGACCGTCT-TCAGACA-AATCT-GCTCTG

[112 111 110 107 106 104  99  63  12  58]

10
Calculating Q matrix...
[[        inf -1.18981285 -1.06529911 -0.97809097 -1.00211631 -1.1478032
  -1.13181922 -1.09479873 -1.07715357 -1.02366213]
 [-1.18981285         inf -1.17009508 -0.97142737 -0.91923878 -1.08265101
  -1.09156477 -1.23253495 -1.02878826 -1.02444302]
 [-1.06529911 -1.17009508         inf -1.14821505 -1.14074855 -0.89823936
  -0.9549324  -1.07273321 -1.12097382 -1.13931952]
 [-0.97809097 -0.97142737 -1.14821505         inf -1.19384996 -1.17896526
  -1.00608856 -0.94279289 -1.11687468 -1.17425135]
 [-1.00211631 -0.91923878 -1.14074855 -1.19384996         inf -1.15753511
  -1.18278887 -0.96863729 -1.00442838 -1.14121284]
 [-1.1478032  -1.08265101 -0.89823936 -1.17896526 -1.15753511         inf
  -1.16328818 -1.00725193 -1.01343332 -1.06138872]
 [-1.13181922 -1.09156477 -0.9549324  -1.00608856 -1.18278887 -1.16328818
          inf -1.20386547 -0.97880528 -0.99740334]
 [-1.09479873 -1.23253495 -1.07273321 -0.94279289 -0.968

  -1.73145648 -1.56112453         inf]]
Joining 41 and 75
[108 107 106 104 101  99  96   6  12  38  43  47  58  61]

14
Calculating Q matrix...
[[        inf -1.33494057 -1.70843307 -1.75001501 -1.40156138 -1.69651316
  -1.84627027 -1.36233032 -1.64358295 -1.53336676 -1.50515817 -1.67922025
  -1.583249   -1.63680353]
 [-1.33494057         inf -1.71826441 -1.68940768 -1.80580831 -1.50425521
  -1.33099751 -1.86051651 -1.64513388 -1.64191635 -1.61774544 -1.43245355
  -1.57470572 -1.52529928]
 [-1.70843307 -1.71826441         inf -1.8558212  -1.6770398  -1.43472218
  -1.51258957 -1.73906138 -1.65944591 -1.40507175 -1.53572083 -1.46935138
  -1.50195516 -1.4639678 ]
 [-1.75001501 -1.68940768 -1.8558212          inf -1.80445607 -1.3796205
  -1.64782688 -1.64101197 -1.70533366 -1.35254093 -1.46587213 -1.53918135
  -1.47062219 -1.37973485]
 [-1.40156138 -1.80580831 -1.6770398  -1.80445607         inf -1.52318087
  -1.5130078  -1.42135419 -1.69541469 -1.45334285 -1.75799092 -1.41598638
  -1.4435

DOING BOOTSTRAP ID: 5
