## Imports

In [None]:
#!conda install ipyrad ipcoal -c conda-forge -c bioconda -y

In [None]:
#!conda install -c conda-forge msprime -y

In [None]:
#This library is needed for msprime
#!conda install -c anaconda libopenblas -y

In [6]:
import ipyrad.analysis as ipa
import ipcoal
import toytree
import toyplot
import pandas as pd
from ipyrad.analysis.baba21 import Drawing

## Methods

### generate_tests_from_names

In [7]:
import random


def generate_tests_from_names(sources, targets_raw, outgroup, no_repeat=True):
    
    if type(sources) is not list:
        sources = [sources]
    
    if type(outgroup) is not list:
        outgroup = [outgroup]
    
    #Declare empty result
    tests =[]
    
    for source in sources:
        #If in sources are the outgroup skip it
        if source in outgroup:
            continue
        
        
        #New unlinked targets list every loop
        targets = targets_raw.copy()
            
        #Remove source and outgroups in case they are repeated in the targets list
        if source in targets:
            targets.remove(source)
        for i in outgroup:
            if i in targets:
                targets.remove(i) 

        #declare empty lists 
        included = []
        

        #Iterate over all targets
        for i, _ in enumerate(targets):
            #empty temp p variables
            p1 = [] 
            p2 = []

           #only do following if target was not already used (not in included)
            if targets[i] not in included:
                #use the first target as p1
                p1 = [targets[i]]
                #mark it is used appending to the list
                if no_repeat: included.append(targets[i])
                #avoid out of boundary errors
                if i < len(targets)-1:
                    #use the next target as p2
                    if targets[i+1] not in included:
                        p2 = [targets[i+1]]
                        #mask it as used
                        if no_repeat: included.append(targets[i+1])
                else:
                    #in case it is the last element (for odd number of targets, use a random one (but the current) as p2)
                    targets.remove(targets[i])
                    p2 =  [random.choice(targets)]

                #append test to the return list
                tests.append({'p1': p1, 'p2': p2, 'p3': [source], 'p4': outgroup})
                
    return tests #return list of dictionaries with every test

### get_significant_donee

In [8]:
def get_significant_donee(df_result, test):
    if df_result["D"].values[0] > 0:
            significative_donee = test["p2"]
    elif df_result["D"].values[0] < 0:
        significative_donee = test["p1"]
    return significative_donee

### run_tests_depuring

In [507]:
def run_tests_depuring(baba, tests, zscoreTH=2.5, verbose=False):
#TODO make it independent for test with multiple donnors
    
    tests_performed_dict = {}
    final_network = []

    #Do test by test
    for i in tests:
        if verbose: print("\n*** Testing:", i, "***")
        donor = i["p3"]
        outgroup = i["p4"]

        #Save DF of resulst
        df_result = baba.run_test(i, nboots=100, quiet=True)

        #Any test is saved in a dict with the significative donee as value
        tests_performed_dict[str(sorted([*i["p1"], *i["p2"]]) + donor)] = None

        #If test is significant
        if df_result["Z"].values[0] > zscoreTH:

            #Get if ABBA or BABA is the significant usind D
            significative_donee = get_significant_donee(df_result, i)

            #Update tests dict
            tests_performed_dict[str(sorted([*i["p1"], *i["p2"]]) + donor)] = significative_donee

            ## Check all phylo neighborhood to see if the significant is due to shared ancestry

            #Get sisters in the tree of significant donee
            sisters_significant_donee = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx) 

            #Remove donor, sig_donee and outgroup from sisters list to avoid test donor-donor or donor-outgroup, donee-donee
            for ele in donor:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)        
            for ele in outgroup:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)
            for ele in significative_donee:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)

            if verbose: print("donor: ", i["p3"])
            if verbose: print("significative donee: ", significative_donee)
            


            in_clade_sd = None
            in_clade_result = []
            #Do nested test having two fixed elements, donor and one donee (being this one the significant_donee)
            if len(sisters_significant_donee) > 0:   
                
                if verbose: print("--- Testing against sisters: ", sisters_significant_donee, "---")

                #Test against all sisters
                for sister in sisters_significant_donee:

                    #Register p1 and p2 sorted 
                    p1_p2_in_clade = str(sorted([*significative_donee, sister]) + donor)
                    if verbose: print("...in clade test: ", donor , "vs." , p1_p2_in_clade)

                    #If this pair has not tested do a baba and see the significant
                    if p1_p2_in_clade not in tests_performed_dict.keys():
                        
                        # Create the test
                        in_clade_test = {"p1":significative_donee,"p2":[sister],"p3":donor,"p4":outgroup}
                        
                        # Do a baba for the test
                        in_clade_df_result = baba.run_test(in_clade_test, nboots=100, quiet=True)
                        
                        # If donee 2 is significant, save in performed test and add this result to the clade
                        if in_clade_df_result["Z"].values[0] > zscoreTH:
                            in_clade_sd = get_significant_donee(in_clade_df_result, in_clade_test)
                            tests_performed_dict[str(sorted([*significative_donee, sister]) + donor)] = in_clade_sd
                            in_clade_result.append(in_clade_sd)
                            if verbose: print("significative donee found: ", in_clade_sd)
                        else:
                            tests_performed_dict[str(sorted([*significative_donee, sister]) + donor)] = None
                            if verbose: print("no significative donee found in: ", p1_p2_in_clade)

                    #Otherwise return the previous result
                    else:
                        in_clade_sd = tests_performed_dict[p1_p2_in_clade]
                        if verbose: print("test already performed (skipped) previous significative donee: ", tests_performed_dict[p1_p2_in_clade])


            #If in_clade_result is a empty list, that means strong significancy of significant_donee against all other sisters in its clade
            if len(in_clade_result) > 1:
                if verbose: print("True shared ancestry (ToDo): ", in_clade_result)
#                 ancestror = tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx
#                 final_network.append((*donor, ancestror))
    
            else:
                if in_clade_sd == significative_donee:
                    if verbose: print("True positive may be: ", in_clade_sd)
                    if (*donor, *in_clade_sd) not in final_network:
                        final_network.append((*donor, *in_clade_sd))
                else:
                    if in_clade_sd:
                        if verbose: print("False positive due share ancestry: ", significative_donee, ". True positive may be: ", in_clade_sd)
                        if (*donor, *in_clade_sd) not in final_network:
                            final_network.append((*donor, *in_clade_sd))
                    else:
                        if verbose: print("False positive due share ancestry: ", significative_donee, ". No true positive in this test")

        #No significant result in test
        else:
            if verbose: print("No significant donee in test")

    if verbose: display(tests_performed_dict)
    return final_network

### run_tests_depuring v2

In [376]:
def run_tests_depuring(data, tests, tree, zscoreTH=2.5, verbose=False):
#v2   
    tests_performed_dict = {}
    truepositives_network = []
    falsepositives_network = []
    n_tests = 0
    n_fp = 0
    
    #Do test by test
    for i in tests:
        if verbose: print("\n*** Testing:", i, "***")
        donor = i["p3"]
        outgroup = i["p4"]
        donees = [*i["p1"], *i["p2"]]
        
        
        #Any test is saved in a dict with the significative donee as value by donors, here I create donor dict
        if str(*donor) not in tests_performed_dict:
            tests_performed_dict[str(*donor)] = {}
        
        
        #Check if the test was already performed, get the significative donee and continue to other test
        if str(sorted(donees)) in tests_performed_dict[str(*donor)].keys():
            if verbose: print("test already performed (skipped) previous significative donee: ", tests_performed_dict[str(*donor)][str(sorted(donees))])
            continue
        

        #Save DF of resulst
        df_result = data.run_test(i, nboots=100, quiet=True)
        n_tests += 1

       
        
        
        #If test is significant
        if df_result["Z"].values[0] > zscoreTH:

            #Get if ABBA or BABA is the significant usind D
            significative_donee = get_significant_donee(df_result, i)

            #Update tests dict
            tests_performed_dict[str(*donor)][str(sorted(donees))] = str(*significative_donee)


            ## Check all phylo neighborhood to see if the significant is due to shared ancestry

            #Get sisters in the tree of significant donee
            sisters_significant_donee = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx) 

            #Remove donor, sig_donee and outgroup from sisters list to avoid test donor-donor or donor-outgroup, donee-donee
            for ele in donor:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)        
            for ele in outgroup:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)
            for ele in significative_donee:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)

            if verbose: print("donor: ", donor)
            if verbose: print("significative donee: ", significative_donee)
            
            
            in_clade_significants = []
            
            ## Test shared ancestry
            #Do nested test having two fixed elements, donor and one donee (being this one the significant_donee)
            #Assumption: if a vs b is significant, that significancy will maintain if we do a test involving
            #a vs b vs b_sister. If b significancy is lost, return it as false positive.
            if len(sisters_significant_donee) > 0:
                
                
                if verbose: print("--- Testing against sisters: ", sisters_significant_donee, "---")
                
                #Test against all sisters
                for sister in sisters_significant_donee:
                    s_sd = str(sorted([sister, *significative_donee]))
                    
                    

                    #Check if the test was already performed to skip it
                    if s_sd not in tests_performed_dict[str(*donor)].keys():
                        # Create the test
                        in_clade_test = {"p1":significative_donee,"p2":[sister],"p3":donor,"p4":outgroup}
                        
                        # Do a baba for the test
                        in_clade_df_result = data.run_test(in_clade_test, nboots=100, quiet=True)
                        n_tests += 1
                        
                        # If donee 2 is significant, save in performed test and add this result to the clade
                        if in_clade_df_result["Z"].values[0] > zscoreTH:
                            in_clade_sd = get_significant_donee(in_clade_df_result, in_clade_test)
#                             tests_performed_dict[str(*donor)][s_sd] = str(*in_clade_sd) #ToDo, this registry can include untested significants
                            
                            #Add to a list of significants in the clade, if more than one is in this list shared ancestry could be true
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(str(*in_clade_sd))
                                          
                            if verbose: print("- significative donee found for", s_sd, ": ", str(*in_clade_sd))
                        else:
                            in_clade_sd = None
                            tests_performed_dict[str(*donor)][s_sd] = None
                            if verbose: print("- no significative donee found in", s_sd)
                        
                    else:
                        #If test was already done, just copy the result
                        in_clade_sd = tests_performed_dict[str(*donor)][s_sd]
                        if verbose: print("- test already performed (skipped) " + s_sd + ". Previous significative donee: ", tests_performed_dict[str(*donor)][s_sd])
                        #If result is different to None add it to in_clade_significants for further interpretation
                        if in_clade_sd:
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(in_clade_sd)
            
            
            
            
            
            else:
                #Assumption: if mixing between two sister species is significant, it is not possible to
                #discriminate using this test. ToDo: think about this
                #If no more sisters in the clade because all of them were included in the main test.
                if verbose: print("All sisters already in the test")
                if verbose: print("Shared ancestry or too close related")
                # ToDo: test it with sister but vs a very different species, to see if significancy is lost
#                 if str(*significative_donee) not in in_clade_significants:
#                     in_clade_significants.append(str(*significative_donee))
                    


                    

                    
            if verbose: print("in_clade_significants", in_clade_significants)
                
            if in_clade_significants:
                if str(*significative_donee) in in_clade_significants:
                    if len(in_clade_significants) > 1:
                        if verbose: print("shared true")
                    else:
                        if verbose: print("true")
                    
#                     truepositives_network.append((*donor, *in_clade_significants))
                    
                    ## Distance test: despite there is some true positive, it still may be false negative. For example
                    # tests where the pair compared are very distant from the donor, so any minimal
                    # allele frequency common in both may be give this false result
                    # Using sister as outgroups and put the outgroup as pair, I induce a max distancing distorion
                    # if it pass, true positive is verified, if not, it may be a artifact caused by distance
                    #distant test to reduce false positives caused by distant samples
                    
                    sisters_donor = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(donor)].get_ancestors()[0].idx) 
                    if str(*donor) in sisters_donor:
                        sisters_donor.remove(str(*donor))
                    if sisters_donor:
                    
                        #Create distant test
                        distant_test = {"p1":outgroup,"p2":in_clade_significants,"p3":donor,"p4":sisters_donor}
                        if verbose: print("distant test to verify significant donee:", distant_test)
                        
                        #Do distant test
                        distant_df_result = data.run_test(distant_test, nboots=100, quiet=True)

                       

                        n_tests += 1
                        if distant_df_result["Z"].values[0] > zscoreTH:
                            distant_sd = get_significant_donee(distant_df_result, distant_test)
                            if distant_sd == in_clade_significants:
                                if (*donor, *in_clade_significants) not in truepositives_network:
                                    truepositives_network.append((*donor, *in_clade_significants))
                                if verbose: print("true verified", distant_sd)
                            else:
                                if verbose: print("false by distance", distant_sd)
                                n_fp += 1
                        else:
                            if verbose: print("no signficant in this test true positive rejected")
                            n_fp += 1
                        
                



                else:
                    if verbose: print("false but others in the clade?")
                    n_fp += 1
            else:
                if verbose: print("false")
                n_fp += 1
                    
            
            
        
        
        #No significant result in test
        else:
            if verbose: print("No significant donee in test")
            tests_performed_dict[str(*donor)][str(sorted(donees))] = None
    
    if verbose: print ("\nNumber of tests performed:" + str(n_tests))
    if verbose: print ("False positives depured:" + str(n_fp))
#     if verbose: print (tests_performed_dict)
    return truepositives_network

## Generate simulated data

In [401]:
# generate a balance tree
tree1 = toytree.rtree.baltree(ntips=6, treeheight=10e6)

# draw the tree w/ an admixture edge
tree1.draw(ts='p', admixture_edges=(2, 3));

In [320]:
# create a simulation model for this tree/network: (src, dest, time-prop., admix-prop.)
model = ipcoal.Model(tree=tree, nsamples=2, Ne=4e5, admixture_edges=(2, 3, 0.5, 0.15))

# simulate N loci
model.sim_loci(nloci=3000, nsites=50)

# drop 50% as missing
model.apply_missing_mask(0.5)

# write result to a database file
model.write_snps_to_hdf5(name="test-baba-miss50", diploid=True)

wrote 45920 SNPs to /home/carlos/AutoABBA/test-baba-miss50.snps.hdf5


## Test 1 (single source)
Version 2 works well with this

### Get snps from hdf5

In [420]:
# init a baba tool from your SNPs database
baba = ipa.baba21("test-baba-miss50.snps.hdf5")

### Generate tests given unique donor

In [315]:
source = "r2"
targets = tree1.get_tip_labels()
outgroup = ["r5"]

tests1 = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests1

[{'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r0'], 'p3': ['r2'], 'p4': ['r5']}]

### Do Baba tests using baba.run

In [110]:
baba.run(tests1, auto=True, nboots=100)

[####################] 100% 0:00:09 | abba-baba tests 


In [477]:
canvas = baba.draw(tree=tree, sort=False, fade=True, width=700, height=500)

### Do Babas using false positive depuration

In [403]:
run_tests_depuring(baba, tests1, tree1, verbose=False)

[('r2', 'r3')]

## Test 2 (multiple sources)
Version 2 works well with this

In [421]:
#multiple donor test All vs All
source = tree1.get_tip_labels()
targets = tree1.get_tip_labels()
outgroup = "r5"

tests2 = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests2

[{'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r4'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r0'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r4'], 'p4': ['r5']

In [468]:
baba.run(tests2, auto=True, nboots=100)

[####################] 100% 0:00:10 | abba-baba tests 


In [469]:
canvas = baba.draw(tree=tree1, sort=False, fade=True, width=700, height=500)

In [422]:
run_tests_depuring(baba, tests2, tree1)
#if repetition is allowed a lot of false positives are showed

[('r2', 'r3'), ('r3', 'r2')]

## Test 3 admixture between two distant things
Version 2 works bad if I do a reduced test no-repeats, otherwise, works pretty well

In [311]:
# generate a balance tree
tree = toytree.rtree.baltree(ntips=10, treeheight=10e6)

# draw the tree w/ an admixture edge
tree.draw(ts='p', admixture_edges=(2, 7));

In [407]:
# create a simulation model for this tree/network: (src, dest, time-prop., admix-prop.)
model = ipcoal.Model(tree=tree, nsamples=2, Ne=4e5, admixture_edges=(2, 7, 0.5, 0.15))

# simulate N loci
model.sim_loci(nloci=3000, nsites=50)

# drop 50% as missing
model.apply_missing_mask(0.5)

# write result to a database file
model.write_snps_to_hdf5(name="test-baba_distant-miss50", diploid=True)

wrote 64134 SNPs to /home/carlos/AutoABBA/test-baba_distant-miss50.snps.hdf5


In [320]:
# init a baba tool from your SNPs database
baba_distant = ipa.baba21("test-baba_distant-miss50.snps.hdf5")

In [327]:
#multiple donor test All vs All
source = tree.get_tip_labels()
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r0'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r1'], 'p2': ['r

In [322]:
baba_distant.run(tests, auto=True, nboots=100)

[####################] 100% 0:00:29 | abba-baba tests 


In [323]:
canvas = baba_distant.draw(tree=tree, sort=False, fade=True, width=700, height=500)

In [328]:
run_tests_depuring(baba_distant, tests, tree, verbose=True)


*** Testing: {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r8', 'r9']} ***
donor:  ['r0']
significative donee:  ['r1']
All sisters already in the test
false because shared ancestry or too close related
in_clade_significants []
false

*** Testing: {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r8', 'r9']} ***
donor:  ['r0']
significative donee:  ['r2']
--- Testing against sisters:  ['r1'] ---
- test already performed (skipped) ['r1', 'r2']. Previous significative donee:  r1
in_clade_significants ['r1']
false but others in the clade?

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r8', 'r9']} ***
No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r0'], 'p4': ['r8', 'r9']} ***
donor:  ['r0']
significative donee:  ['r4']
--- Testing against sisters:  ['r3'] ---
- test already performed (skipped) ['r3', 'r4']. Previous significative donee:  None
in_clade_significants []
false

*** Testing: {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r0'], 'p4

No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r6'], 'p3': ['r5'], 'p4': ['r8', 'r9']} ***
donor:  ['r5']
significative donee:  ['r6']
All sisters already in the test
false because shared ancestry or too close related
in_clade_significants []
false

*** Testing: {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r5'], 'p4': ['r8', 'r9']} ***
donor:  ['r5']
significative donee:  ['r6']
All sisters already in the test
false because shared ancestry or too close related
in_clade_significants []
false

*** Testing: {'p1': ['r7'], 'p2': ['r4'], 'p3': ['r5'], 'p4': ['r8', 'r9']} ***
donor:  ['r5']
significative donee:  ['r7']
--- Testing against sisters:  ['r6'] ---
- test already performed (skipped) ['r6', 'r7']. Previous significative donee:  r6
in_clade_significants ['r6']
false but others in the clade?

*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r6'], 'p4': ['r8', 'r9']} ***
No significant donee in test

*** Testing: {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r6'], 'p4': ['r8', 'r9'

[('r2', 'r7'), ('r7', 'r2')]

## Test 4 (directionality)

In [330]:
# generate a balance tree
tree = toytree.rtree.baltree(ntips=10, treeheight=10e6)

# draw the tree w/ an admixture edge
tree.draw(ts='p', admixture_edges=[(7, 2), (1, 3)])

(<toyplot.canvas.Canvas at 0x7f9239c9a1d0>,
 <toyplot.coordinates.Cartesian at 0x7f9239bfd5d0>,
 <toytree.Render.ToytreeMark at 0x7f91d832e090>)

In [87]:
# create a simulation model for this tree/network: (src, dest, time-prop., admix-prop.)
model = ipcoal.Model(tree=tree, nsamples=2, Ne=4e5, admixture_edges=[(7, 2, 0.5, 0.15),(1, 3, 0.5, 0.15)])

# simulate N loci
model.sim_loci(nloci=3000, nsites=50)

# drop 50% as missing
model.apply_missing_mask(0.5)

# write result to a database file
model.write_snps_to_hdf5(name="test-baba_two-miss50", diploid=True)

wrote 64301 SNPs to /home/carlos/AutoABBA/test-baba_two-miss50.snps.hdf5


In [331]:
# init a baba tool from your SNPs database
baba_two = ipa.baba21("test-baba_two-miss50.snps.hdf5")

In [332]:
source = "r7"
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r0'], 'p2': ['r1'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r7'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r1'], 'p3': ['r7'], 'p4': ['r8', 'r9']}]

In [334]:
# 7>2 & 1>3
run_tests_depuring(baba_two, tests, tree, verbose=True)


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r7'], 'p4': ['r8', 'r9']} ***
No significant donee in test

*** Testing: {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r7'], 'p4': ['r8', 'r9']} ***
donor:  ['r7']
significative donee:  ['r2']
--- Testing against sisters:  ['r0', 'r1'] ---
- significative donee found for ['r0', 'r2'] :  r2
- test already performed (skipped) ['r1', 'r2']. Previous significative donee:  r2
in_clade_significants ['r2']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['r2'], 'p3': ['r7'], 'p4': ['r5', 'r6']}
['r2']
true verified ['r2']

*** Testing: {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r7'], 'p4': ['r8', 'r9']} ***
donor:  ['r7']
significative donee:  ['r2']
--- Testing against sisters:  ['r0', 'r1'] ---
- significative donee found for ['r0', 'r2'] :  r2
- test already performed (skipped) ['r1', 'r2']. Previous significative donee:  r2
in_clade_significants ['r2']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['

[('r7', 'r2')]

In [338]:
source = "r2"
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r5'], 'p3': ['r2'], 'p4': ['r8', 'r9']}]

In [339]:
run_tests_depuring(baba_two, tests, tree, verbose=True)


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r8', 'r9']} ***
donor:  ['r2']
significative donee:  ['r0']
--- Testing against sisters:  ['r1'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['r0'], 'p3': ['r2'], 'p4': ['r0', 'r1']}
['r8', 'r9']
false by distance ['r8', 'r9']

*** Testing: {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r8', 'r9']} ***
donor:  ['r2']
significative donee:  ['r1']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
false but others in the clade?

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r8', 'r9']} ***
No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r2'], 'p4': ['r8', 'r9']} ***
donor:  ['r2']
significative donee:  ['r4']
--- Testing agains

[('r2', 'r7')]

In [340]:
source = "r1"
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r0'], 'p3': ['r1'], 'p4': ['r8', 'r9']}]

In [341]:
run_tests_depuring(baba_two, tests, tree, verbose=True)


*** Testing: {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r8', 'r9']} ***
donor:  ['r1']
significative donee:  ['r0']
All sisters already in the test
false because shared ancestry or too close related
in_clade_significants []
false

*** Testing: {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r8', 'r9']} ***
donor:  ['r1']
significative donee:  ['r2']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r2']. Previous significative donee:  r0
in_clade_significants ['r0']
false but others in the clade?

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r8', 'r9']} ***
donor:  ['r1']
significative donee:  ['r3']
--- Testing against sisters:  ['r4'] ---
- test already performed (skipped) ['r3', 'r4']. Previous significative donee:  r3
in_clade_significants ['r3']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r0']}
['r3']
true verified ['r3']

*** Testing: {'p1': ['r4'], 'p2': [

[('r1', 'r3')]

In [342]:
source = "r3"
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r0'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r4'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r3'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r6'], 'p3': ['r3'], 'p4': ['r8', 'r9']}]

In [343]:
run_tests_depuring(baba_two, tests, tree, verbose=True)


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r8', 'r9']} ***
donor:  ['r3']
significative donee:  ['r1']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r1
in_clade_significants ['r1']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r4']}
['r1']
true verified ['r1']

*** Testing: {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r8', 'r9']} ***
donor:  ['r3']
significative donee:  ['r1']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r1
in_clade_significants ['r1']
true
distant test to verify significant donee: {'p1': ['r8', 'r9'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r4']}
['r1']
true verified ['r1']

*** Testing: {'p1': ['r2'], 'p2': ['r4'], 'p3': ['r3'], 'p4': ['r8', 'r9']} ***
donor:  ['r3']
significative donee:  ['r4']
All sisters already in the test
false becau

[('r3', 'r1')]

In [344]:
source = tree.get_tip_labels()
targets = tree.get_tip_labels()
outgroup = ["r8", "r9"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r5'], 'p3': ['r0'], 'p4': ['r8', 'r9']},
 {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r7'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r8', 'r9']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r8', 'r9']},
 {'p1': ['r1'], 'p2': ['r

In [347]:
#Full test not
run_tests_depuring(baba_two, tests, tree, verbose=False)

['r3']
['r3']
['r8', 'r9']
['r7']
['r8', 'r9']
['r1']
['r1']
['r8', 'r9']
['r8', 'r9']
['r2']
['r2']
80
{'r0': {"['r1', 'r2']": 'r1', "['r2', 'r3']": 'r2', "['r3', 'r4']": None, "['r4', 'r5']": 'r4', "['r5', 'r6']": None, "['r6', 'r7']": 'r7', "['r5', 'r7']": 'r7'}, 'r1': {"['r0', 'r2']": 'r0', "['r2', 'r3']": 'r2', "['r3', 'r4']": 'r3', "['r4', 'r5']": 'r4', "['r5', 'r6']": None, "['r6', 'r7']": 'r7', "['r3', 'r7']": 'r3'}, 'r2': {"['r0', 'r1']": 'r0', "['r1', 'r3']": 'r1', "['r3', 'r4']": None, "['r4', 'r5']": 'r4', "['r5', 'r6']": None, "['r6', 'r7']": 'r7', "['r0', 'r7']": 'r0'}, 'r3': {"['r0', 'r1']": 'r1', "['r1', 'r2']": 'r1', "['r2', 'r4']": 'r4', "['r4', 'r5']": 'r4', "['r5', 'r6']": None, "['r6', 'r7']": 'r7', "['r5', 'r7']": 'r7'}, 'r4': {"['r0', 'r1']": 'r1', "['r1', 'r2']": 'r1', "['r2', 'r3']": 'r3', "['r3', 'r5']": 'r3', "['r5', 'r6']": None, "['r6', 'r7']": 'r7', "['r0', 'r7']": 'r0'}, 'r5': {"['r0', 'r1']": None, "['r1', 'r2']": None, "['r2', 'r3']": None, "['r3', 'r4'

[('r1', 'r3'), ('r2', 'r7'), ('r3', 'r1'), ('r7', 'r2')]

## Test 5 (another simple test)
Version 2, totally functional for this case

In [115]:
# init a baba tool from your SNPs database
baba = ipa.baba21("test-baba-miss50.snps.hdf5")

In [248]:
baba.run(tests, auto=True, nboots=100)

[####################] 100% 0:00:08 | abba-baba tests 


In [249]:
canvas = baba.draw(tree=tree1, sort=False, fade=True, width=700, height=500)

In [309]:
#2 to 3 is the real result
source = tree1.get_tip_labels()
targets = tree1.get_tip_labels()
outgroup = ["r5"]

tests = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests

[{'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r0'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r4'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r4'], 'p4': ['r5']

In [310]:
run_tests_depuring(baba, tests, tree1, verbose=False)

false because shared ancestry or too close related
in_clade_significants []
false
in_clade_significants ['r1']
false but others in the clade?
false because shared ancestry or too close related
in_clade_significants []
false
in_clade_significants ['r0']
false but others in the clade?
false because shared ancestry or too close related
in_clade_significants []
false
in_clade_significants []
false
in_clade_significants ['r3']
true
distant test to verify significant donee: {'p1': ['r5'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r0', 'r1']}
['r3']
in_clade_significants ['r2']
true
distant test to verify significant donee: {'p1': ['r5'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r4']}
['r2']
false because shared ancestry or too close related
in_clade_significants []
false
in_clade_significants ['r2']
true
distant test to verify significant donee: {'p1': ['r5'], 'p2': ['r2'], 'p3': ['r4'], 'p4': ['r3']}
['r5']
false by distance
false because shared ancestry or too close related
in_clade_significants []
f

[('r2', 'r3'), ('r3', 'r2')]

## Test 6 multiple admixture edges

In [358]:
# generate a balance tree
tree_multiple = toytree.rtree.baltree(ntips=12, treeheight=10e6)

# draw the tree w/ an admixture edge
tree_multiple.draw(ts='p', admixture_edges=[(2, 3),(10, 8),(10, 9),(0, 6)]);

In [359]:
# create a simulation model for this tree/network: (src, dest, time-prop., admix-prop.)
model = ipcoal.Model(tree=tree_multiple, nsamples=2, Ne=4e5, admixture_edges=[(2, 3, 0.5, 0.15),
                                                                     (10, 8, 0.5, 0.15),
                                                                     (10, 9, 0.5, 0.15),
                                                                     (0, 6, 0.5, 0.15),])

# simulate N loci
model.sim_loci(nloci=3000, nsites=50)

# drop 50% as missing
model.apply_missing_mask(0.5)

# write result to a database file
model.write_snps_to_hdf5(name="test-baba_multiple-miss50", diploid=True)

wrote 69661 SNPs to /home/carlos/AutoABBA/test-baba_multiple-miss50.snps.hdf5


In [360]:
baba_multiple = ipa.baba21("test-baba_multiple-miss50.snps.hdf5")

In [363]:
source = tree_multiple.get_tip_labels()
targets = tree_multiple.get_tip_labels()
outgroup = ["r11"]

tests_multiple = generate_tests_from_names(source, targets, outgroup, no_repeat=False)

In [364]:
## real true are: (2, 3),(10, 8),(10, 9),(0, 6)
run_tests_depuring(baba_multiple, tests_multiple, tree_multiple, verbose=False)

[('r0', 'r6'),
 ('r2', 'r3'),
 ('r3', 'r2'),
 ('r6', 'r0'),
 ('r8', 'r10'),
 ('r10', 'r8')]

In [370]:
source = ["r10", "r6"]
targets = tree_multiple.get_tip_labels()
outgroup = ["r11"]

tests_multiple = generate_tests_from_names(source, targets, outgroup, no_repeat=False)

In [384]:
## real true are: (10, 8),(10, 9)
run_tests_depuring(baba_multiple, tests_multiple, tree_multiple, verbose=True)


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r10'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r10'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r10'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r10'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r10'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r5'], 'p2': ['r6'], 'p3': ['r10'], 'p4': ['r11']} ***
donor:  ['r10']
significative donee:  ['r6']
--- Testing against sisters:  ['r7'] ---
- no significative donee found in ['r6', 'r7']
in_clade_significants []
false

*** Testing: {'p1': ['r6'], 'p2': ['r7'], 'p3': ['r10'], 'p4': ['r11']} ***
test already performed (skipped) previous significative donee:  None

*** Testing: {'p1': ['r7'], 'p2': ['r8'], 'p3': ['r10'], 'p4': ['r11']} ***
donor:

[('r10', 'r8'), ('r10', 'r9'), ('r6', 'r0'), ('r6', 'r7')]

## Test 7 multiple admix from one source

In [466]:
# generate a balance tree
tree_madmix_distantOut = toytree.rtree.baltree(ntips=12, treeheight=10e6)

# draw the tree w/ an admixture edge
tree_madmix_distantOut.draw(ts='p', admixture_edges=[(2, 0),(2, 8),(2, 9),(2, 6),(2, 5)]);

In [459]:
# create a simulation model for this tree/network: (src, dest, time-prop., admix-prop.)
model = ipcoal.Model(tree=tree_madmix_distantOut, nsamples=2, Ne=4e5, admixture_edges=[(2, 0, 0.5, 0.15),
                                                                     (2, 8, 0.5, 0.15),
                                                                     (2, 9, 0.5, 0.15),
                                                                     (2, 6, 0.5, 0.15),
                                                                     (2, 5, 0.5, 0.15),])

# simulate N loci
model.sim_loci(nloci=3000, nsites=50)

# drop 50% as missing
model.apply_missing_mask(0.5)

# write result to a database file
model.write_snps_to_hdf5(name="test-baba_distantOut-miss50", diploid=True)

wrote 89721 SNPs to /home/carlos/AutoABBA/test-baba_distantOut-miss50.snps.hdf5


In [443]:
baba_madmix = ipa.baba21("test-baba_madmix-miss50.snps.hdf5")

In [449]:
source = "r2"
targets = tree_madmix.get_tip_labels()
outgroup = ["r11"]

tests_madmix = generate_tests_from_names(source, targets, outgroup, no_repeat=False)

In [457]:
## real true are: (2, 0),(2, 8),(2, 9),(2, 6),(2, 5)
run_tests_depuring(baba_madmix, tests_madmix, tree_madmix, verbose=True)

#problems: 2>0, 2>5, 2>8
#2>0 and 2>5 are falsed by distance
#2>8 is falsed by sisters, because 2>6 is real true, appaerently, testing 2vs6vs8, make both false


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r11']} ***
donor:  ['r2']
significative donee:  ['r0']
--- Testing against sisters:  ['r1'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
true positive for verification
>>> distant test to verify significant donee: {'p1': ['r11'], 'p2': ['r0'], 'p3': ['r2'], 'p4': ['r1']} <<<
falsed by distance ['r11']

*** Testing: {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r11']} ***
donor:  ['r2']
significative donee:  ['r1']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
false but others in the clade?

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r11']} ***
No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r2'], 'p4': ['r11']} ***
donor:  ['r2']
significative donee:  ['r5']
--- Testing against sisters:  ['

[('r2', 'r6'), ('r2', 'r9')]

In [461]:
baba_distantOut = ipa.baba21("test-baba_distantOut-miss50.snps.hdf5")

In [464]:
source = "r2"
targets = tree_madmix_distantOut.get_tip_labels()
outgroup = ["r17"]

tests_madmix_distantOut = generate_tests_from_names(source, targets, outgroup, no_repeat=False)

In [465]:
## real true are: (2, 0),(2, 8),(2, 9),(2, 6),(2, 5)
run_tests_depuring(baba_distantOut, tests_madmix_distantOut, tree_madmix_distantOut, verbose=True)

#problems: 2>0, 2>5, 2>8


*** Testing: {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r17']} ***
donor:  ['r2']
significative donee:  ['r0']
--- Testing against sisters:  ['r1'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
true positive for verification
>>> distant test to verify significant donee: {'p1': ['r17'], 'p2': ['r0'], 'p3': ['r2'], 'p4': ['r1']} <<<
falsed by distance ['r17']

*** Testing: {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r17']} ***
donor:  ['r2']
significative donee:  ['r1']
--- Testing against sisters:  ['r0'] ---
- test already performed (skipped) ['r0', 'r1']. Previous significative donee:  r0
in_clade_significants ['r0']
false but others in the clade?

*** Testing: {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r17']} ***
No significant donee in test

*** Testing: {'p1': ['r4'], 'p2': ['r5'], 'p3': ['r2'], 'p4': ['r17']} ***
No significant donee in test

*** Testing: {'p1': ['r5'], 'p2': ['r6'], 'p3'

[('r2', 'r9')]

## Tests for resolving sister paradox

In [437]:
# im = {"p1":["r1"],"p2":["r9"],"p3":["r10"],"p4":["r11"]}
# test = baba_multiple.run_test(im, nboots=100, quiet=True)
# display(test)
# print(im["p3"],">", get_significant_donee(test, im))



# im = {"p1":["r1"],"p2":["r9"],"p3_1":["r10"],"p3_2":["r0"],"p4":["r11"]}
# test = baba_multiple.run_partitioned_test(im, nboots=100, quiet=True)
# display(test)

# im = {"p1":["r1"],"p2":["r9"],"p3_1":["r0"],"p3_2":["r10"],"p4":["r11"]}
# test = baba_multiple.run_partitioned_test(im, nboots=100, quiet=True)
# display(test)




# im = {'p1': ['r10'], 'p2': ['r7'], 'p3': ['r6'], 'p4': ['r11']}
# test = baba_multiple.run_test(im, nboots=100, quiet=True)
# display(test)
# print(im["p3"],">", get_significant_donee(test, im))



# im = {'p1': ['r10'],'p2': ['r7'], 'p3_1': ['r6'], 'p3_2': ['r9'], 'p4': ['r11']}
# test = baba_multiple.run_partitioned_test(im, nboots=100, quiet=True)
# display(test)



# im = {'p1': ['r10'],'p2': ['r7'], 'p3_1': ['r9'], 'p3_2': ['r6'], 'p4': ['r11']}
# test = baba_multiple.run_partitioned_test(im, nboots=100, quiet=True)
# display(test)


im = {'p1': ['r2'], 'p2': ['r1'], 'p3': ['r0'], 'p4': ['r5']}
test = baba.run_test(im, nboots=100, quiet=True)
display(test)
print(im["p3"],">", get_significant_donee(test, im))


im = {'p1': ['r2'],'p2': ['r1'], 'p3_1': ['r0'], 'p3_2': ['r3'], 'p4': ['r5']}
test = baba.run_partitioned_test(im, nboots=100, quiet=True)
display(test)




Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.862,0.014,61.692,1533.25,113.25,12208,960


['r0'] > ['r1']


Unnamed: 0,D12,D1,D2,boot12std,boot1std,boot2std,Z12,Z1,Z2,ABBBA,BABBA,ABBAA,BABAA,ABABA,BAABA,nSNPs,nloci
0,0.323,0.914,-0.794,0.103,0.012,0.036,3.125,76.831,21.818,54.25,27.75,1080.0,48.5,25.5,222.0,10820,742


In [440]:
im = {'p1': ['r2'], 'p2': ['r1'], 'p3': ['r0'], 'p4': ['r5']}
test = baba.run_test(im, nboots=100, quiet=True)
display(test)
print(im["p3"],">", get_significant_donee(test, im))



Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.862,0.012,74.266,1533.25,113.25,12208,960


['r0'] > ['r1']


## run_tests_depuring v3

In [456]:
def run_tests_depuring(data, tests, tree, zscoreTH=2.5, verbose=False):
#v3
    tests_performed_dict = {}
    truepositives_network = []
    falsepositives_network = []
    n_tests = 0
    n_fp = 0
    
    #Do test by test
    for i in tests:
        if verbose: print("\n*** Testing:", i, "***")
        donor = i["p3"]
        outgroup = i["p4"]
        donees = [*i["p1"], *i["p2"]]
        
        
        #Any test is saved in a dict with the significative donee as value by donors, here I create donor dict
        if str(*donor) not in tests_performed_dict:
            tests_performed_dict[str(*donor)] = {}
        
        
        #Check if the test was already performed, get the significative donee and continue to other test
        if str(sorted(donees)) in tests_performed_dict[str(*donor)].keys():
            if verbose: print("test already performed (skipped) previous significative donee: ", tests_performed_dict[str(*donor)][str(sorted(donees))])
            continue
        

        #Save DF of resulst
        df_result = data.run_test(i, nboots=100, quiet=True)
        n_tests += 1

       
        
        
        #If test is significant
        if df_result["Z"].values[0] > zscoreTH:

            #Get if ABBA or BABA is the significant usind D
            significative_donee = get_significant_donee(df_result, i)

            #Update tests dict
            tests_performed_dict[str(*donor)][str(sorted(donees))] = str(*significative_donee)


            ## Check all phylo neighborhood to see if the significant is due to shared ancestry

            #Get sisters in the tree of significant donee
            sisters_significant_donee = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx) 

            #Remove donor, sig_donee and outgroup from sisters list to avoid test donor-donor or donor-outgroup, donee-donee
            for ele in donor:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)        
            for ele in outgroup:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)
            for ele in significative_donee:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)

            if verbose: print("donor: ", donor)
            if verbose: print("significative donee: ", significative_donee)
            
            
            in_clade_significants = []
            
            ## Test shared ancestry
            #Do nested test having two fixed elements, donor and one donee (being this one the significant_donee)
            #Assumption: if a vs b is significant, that significancy will maintain if we do a test involving
            #a vs b vs b_sister. If b significancy is lost, return it as false positive.
            if len(sisters_significant_donee) > 0:
                
                
                if verbose: print("--- Testing against sisters: ", sisters_significant_donee, "---")
                
                #Test against all sisters
                for sister in sisters_significant_donee:
                    s_sd = str(sorted([sister, *significative_donee]))
                    
                    

                    #Check if the test was already performed to skip it
                    if s_sd not in tests_performed_dict[str(*donor)].keys():
                        # Create the test
                        in_clade_test = {"p1":significative_donee,"p2":[sister],"p3":donor,"p4":outgroup}
                        
                        # Do a baba for the test
                        in_clade_df_result = data.run_test(in_clade_test, nboots=100, quiet=True)
                        n_tests += 1
                        
                        # If donee 2 is significant, save in performed test and add this result to the clade
                        if in_clade_df_result["Z"].values[0] > zscoreTH:
                            in_clade_sd = get_significant_donee(in_clade_df_result, in_clade_test)
#                             tests_performed_dict[str(*donor)][s_sd] = str(*in_clade_sd) #ToDo, this registry can include untested significants
                            
                            #Add to a list of significants in the clade, if more than one is in this list shared ancestry could be true
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(str(*in_clade_sd))
                                          
                            if verbose: print("- significative donee found for", s_sd, ": ", str(*in_clade_sd))
                        else:
                            in_clade_sd = None
                            tests_performed_dict[str(*donor)][s_sd] = None
                            if verbose: print("- no significative donee found in", s_sd)
                        
                    else:
                        #If test was already done, just copy the result
                        in_clade_sd = tests_performed_dict[str(*donor)][s_sd]
                        if verbose: print("- test already performed (skipped) " + s_sd + ". Previous significative donee: ", tests_performed_dict[str(*donor)][s_sd])
                        #If result is different to None add it to in_clade_significants for further interpretation
                        if in_clade_sd:
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(in_clade_sd)
            
            
            
            
            
            else:
                #If no more sisters in the clade because all of them were included in the main test.
                #For now it is assumed as false positive. ToDo: explore more about it.
#                 if verbose: print("All sisters already in the test")
                if verbose: print("Shared ancestry or too close related, assumed as false positive")
#                 if str(*significative_donee) not in in_clade_significants:
#                     in_clade_significants.append(str(*significative_donee))
                    
                    
            ## Decision maker
            # Based on in_clade_significants results in previous steps (a.k.a. true positives for verification) do               
            if verbose: print("in_clade_significants", in_clade_significants)
                
            if in_clade_significants:
                if str(*significative_donee) in in_clade_significants:
                    if len(in_clade_significants) > 1:
                        if verbose: print("shared true")
                        #ToDo: maybe return ancestral node instead of tip in tree. Do some simulations with this scenario
                    else:
                        if verbose: print("true positive for verification")
                    
#                     truepositives_network.append((*donor, *in_clade_significants))
                    
                    ## Distance test: despite there is some true positive, it still may be false negative. For example
                    # tests where the pair compared are very distant from the donor, so any minimal
                    # allele frequency common in both may be give this false result
                    # Using sister as outgroups and put the outgroup as pair, I induce a max distancing distorion
                    # if it pass, true positive is verified, if not, it may be a artifact caused by distance
                    #distant test to reduce false positives caused by distant samples
                    
                    sisters_donor = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(donor)].get_ancestors()[0].idx) 
                    #remove donor from sister group
                    if str(*donor) in sisters_donor:
                        sisters_donor.remove(str(*donor))
                    #remove significant donee from sister group
                    if str(*in_clade_significants) in sisters_donor:
                        sisters_donor.remove(str(*in_clade_significants))
                        
                    
                    #if still something is in sisters_donor group do distant test
                    if sisters_donor:
                    
                        #Create distant test
                        distant_test = {"p1":outgroup,"p2":in_clade_significants,"p3":donor,"p4":sisters_donor}
                        if verbose: print(">>> distant test to verify significant donee:", distant_test, "<<<")
                        
                        #Do distant test
                        distant_df_result = data.run_test(distant_test, nboots=100, quiet=True)

                       

                        n_tests += 1
                        if distant_df_result["Z"].values[0] > zscoreTH:
                            distant_sd = get_significant_donee(distant_df_result, distant_test)
                            if distant_sd == in_clade_significants:
                                if (*donor, *in_clade_significants) not in truepositives_network:
                                    truepositives_network.append((*donor, *in_clade_significants))
                                if verbose: print("true verified", distant_sd)
                            else:
                                if verbose: print("falsed by distance", distant_sd)
                                n_fp += 1
                        else:
                            if verbose: print("no signficant in this test true positive rejected")
                            n_fp += 1
                    
                    # when no sisters to tests distance bias do
                    else:
                        if verbose: print("imposible to do distant tests") #significant donnee assummed as false positive")
#                         if (*donor, *in_clade_significants) not in truepositives_network:
#                             truepositives_network.append((*donor, *in_clade_significants))
                        
                



                else:
                    if verbose: print("false but others in the clade?")
                    n_fp += 1
                    
                    
            else:
                if verbose: print("false")
                n_fp += 1
                    
            
            
        
        
        #No significant result in test
        else:
            if verbose: print("No significant donee in test")
            tests_performed_dict[str(*donor)][str(sorted(donees))] = None
    
    if verbose: print ("\nNumber of tests performed:" + str(n_tests))
    if verbose: print ("False positives depured:" + str(n_fp))
#     if verbose: print (tests_performed_dict)
    return truepositives_network

main_test (donor):
 simple baba of donor againt other donees
 

sisters test (donor, significant_donee, deep):
    get sisters of significant_donee
    baba of donor against significant_donee and all sisters, given a deep
    


distant_test (donor, donee):



partitioned_test (donor, donor2, donee):


## run_tests_depuring v4

In [16]:
def run_tests_depuring(data, tests, tree, zscoreTH=2.5, distant=True, verbose=False):
#v4
    tests_performed_dict = {}
    truepositives_network = []
    falsepositives_network = []
    n_tests = 0
    n_fp = 0
    
    #Do test by test
    for i in tests:
        if verbose: print("\n*** Testing:", i, "***")
        donor = i["p3"]
        outgroup = i["p4"]
        donees = [*i["p1"], *i["p2"]]
        
        
        #Any test is saved in a dict with the significative donee as value by donors, here I create donor dict
        if str(*donor) not in tests_performed_dict:
            tests_performed_dict[str(*donor)] = {}
        
        
        #Check if the test was already performed, get the significative donee and continue to other test
        if str(sorted(donees)) in tests_performed_dict[str(*donor)].keys():
            if verbose: print("test already performed (skipped) previous significative donee: ", tests_performed_dict[str(*donor)][str(sorted(donees))])
            continue
        

        #Save DF of resulst
        df_result = data.run_test(i, nboots=100, quiet=True)
        n_tests += 1

       
        
        
        #If test is significant
        if df_result["Z"].values[0] > zscoreTH:

            #Get if ABBA or BABA is the significant usind D
            significative_donee = get_significant_donee(df_result, i)

            #Update tests dict
            tests_performed_dict[str(*donor)][str(sorted(donees))] = str(*significative_donee)


            ## Check all phylo neighborhood to see if the significant is due to shared ancestry

            #Get sisters in the tree of significant donee
            sisters_significant_donee = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx) 

            #Remove donor, sig_donee and outgroup from sisters list to avoid test donor-donor or donor-outgroup, donee-donee
            for ele in donor:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)        
            for ele in outgroup:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)
            for ele in significative_donee:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)

            if verbose: print("donor: ", donor)
            if verbose: print("significative donee: ", significative_donee)
            
            
            in_clade_significants = []
            
            ## Test shared ancestry
            #Do nested test having two fixed elements, donor and one donee (being this one the significant_donee)
            #Assumption: if a vs b is significant, that significancy will maintain if we do a test involving
            #a vs b vs b_sister. If b significancy is lost, return it as false positive.
            if len(sisters_significant_donee) > 0:
                
                
                if verbose: print("--- Testing against sisters: ", sisters_significant_donee, "---")
                
                #Test against all sisters
                for sister in sisters_significant_donee:
                    s_sd = str(sorted([sister, *significative_donee]))
                    
                    

                    #Check if the test was already performed to skip it
                    if s_sd not in tests_performed_dict[str(*donor)].keys():
                        # Create the test
                        in_clade_test = {"p1":significative_donee,"p2":[sister],"p3":donor,"p4":outgroup}
                        
                        # Do a baba for the test
                        in_clade_df_result = data.run_test(in_clade_test, nboots=100, quiet=True)
                        n_tests += 1
                        
                        # If donee 2 is significant, save in performed test and add this result to the clade
                        if in_clade_df_result["Z"].values[0] > zscoreTH:
                            in_clade_sd = get_significant_donee(in_clade_df_result, in_clade_test)
#                             tests_performed_dict[str(*donor)][s_sd] = str(*in_clade_sd) #ToDo, this registry can include untested significants
                            
                            #Add to a list of significants in the clade, if more than one is in this list shared ancestry could be true
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(str(*in_clade_sd))
                                          
                            if verbose: print("- significative donee found for", s_sd, ": ", str(*in_clade_sd))
                        else:
                            in_clade_sd = None
                            tests_performed_dict[str(*donor)][s_sd] = None
                            if verbose: print("- no significative donee found in", s_sd)
                        
                    else:
                        #If test was already done, just copy the result
                        in_clade_sd = tests_performed_dict[str(*donor)][s_sd]
                        if verbose: print("- test already performed (skipped) " + s_sd + ". Previous significative donee: ", tests_performed_dict[str(*donor)][s_sd])
                        #If result is different to None add it to in_clade_significants for further interpretation
                        if in_clade_sd:
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(in_clade_sd)
            
            
            
            
            
            else:
                #If no more sisters in the clade because all of them were included in the main test.
                #For now it is assumed as false positive. ToDo: explore more about it.
#                 if verbose: print("All sisters already in the test")
                if verbose: print("Shared ancestry or too close related, assumed as false positive")
#                 if str(*significative_donee) not in in_clade_significants:
#                     in_clade_significants.append(str(*significative_donee))
                    
                    
            ## Decision maker
            # Based on in_clade_significants results in previous steps (a.k.a. true positives for verification) do               
            if verbose: print("in_clade_significants", in_clade_significants)
                
            if in_clade_significants:
                if str(*significative_donee) in in_clade_significants:
                    if len(in_clade_significants) > 1:
                        if verbose: print("shared true")
                        #ToDo: maybe return ancestral node instead of tip in tree. Do some simulations with this scenario
                    else:
                        if verbose: print("true positive for verification")
                    
#                    
                    
                    ## Distance test: despite there is some true positive, it still may be false negative. For example
                    # tests where the pair compared are very distant from the donor, so any minimal
                    # allele frequency common in both may be give this false result
                    # Using sister as outgroups and put the outgroup as pair, I induce a max distancing distorion
                    # if it pass, true positive is verified, if not, it may be a artifact caused by distance
                    #distant test to reduce false positives caused by distant samples
                    if distant:
                        sisters_donor = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(donor)].get_ancestors()[0].idx) 
                        #remove donor from sister group
                        if str(*donor) in sisters_donor:
                            sisters_donor.remove(str(*donor))
                        #remove significant donee from sister group
                        if str(*in_clade_significants) in sisters_donor:
                            sisters_donor.remove(str(*in_clade_significants))


                        #if still something is in sisters_donor group do distant test
                        if sisters_donor:

                            #Create distant test
                            distant_test = {"p1":outgroup,"p2":in_clade_significants,"p3":donor,"p4":sisters_donor}
                            if verbose: print(">>> distant test to verify significant donee:", distant_test, "<<<")

                            #Do distant test
                            distant_df_result = data.run_test(distant_test, nboots=100, quiet=True)



                            n_tests += 1
                            if distant_df_result["Z"].values[0] > zscoreTH:
                                distant_sd = get_significant_donee(distant_df_result, distant_test)
                                if distant_sd == in_clade_significants:
                                    if (*donor, *in_clade_significants) not in truepositives_network:
                                        truepositives_network.append((*donor, *in_clade_significants))
                                    if verbose: print("true verified", distant_sd)
                                else:
                                    if verbose: print("falsed by distance", distant_sd)
                                    n_fp += 1
                            else:
                                if verbose: print("no signficant in this test true positive rejected")
                                n_fp += 1

                        # when no sisters to tests distance bias do
                        else:
                            if verbose: print("imposible to do distant tests") #significant donnee assummed as false positive")
    #                         if (*donor, *in_clade_significants) not in truepositives_network:
    #                             truepositives_network.append((*donor, *in_clade_significants))
                    else:
                         truepositives_network.append((*donor, *in_clade_significants))
                        
                



                else:
                    if verbose: print("false but others in the clade?")
                    n_fp += 1
                    
                    
            else:
                if verbose: print("false")
                n_fp += 1
                    
            
            
        
        
        #No significant result in test
        else:
            if verbose: print("No significant donee in test")
            tests_performed_dict[str(*donor)][str(sorted(donees))] = None
    
    if verbose: print ("\nNumber of tests performed:" + str(n_tests))
    if verbose: print ("False positives depured:" + str(n_fp))
#     if verbose: print (tests_performed_dict)
    return truepositives_network

## run_tests_depuring v5

In [23]:
def run_tests_depuring(data, tests, tree, zscoreTH=2.5, distant=True, verbose=False):
#v5

    tests_performed_dict = {}
    truepositives_network = []
    falsepositives_network = []
    n_tests = 0
    n_fp = 0
    
    #Do test by test
    for i in tests:
        if verbose: print("\n*** Testing:", i, "***")
        donor = i["p3"]
        outgroup = i["p4"]
        donees = [*i["p1"], *i["p2"]]
        
        
        #Any test is saved in a dict with the significative donee as value by donors, here I create donor dict
        if str(*donor) not in tests_performed_dict:
            tests_performed_dict[str(*donor)] = {}
        
        
        #Check if the test was already performed, get the significative donee and continue to other test
        if str(sorted(donees)) in tests_performed_dict[str(*donor)].keys():
            if verbose: print("test already performed (skipped) previous significative donee: ", tests_performed_dict[str(*donor)][str(sorted(donees))])
            continue
        

        #Save DF of resulst
        n_tests += 1
        try:
            df_result = data.run_test(i, nboots=100, quiet=True)
        except:
            continue
        

       
        
        
        #If test is significant
        if df_result["Z"].values[0] > zscoreTH:

            #Get if ABBA or BABA is the significant usind D
            significative_donee = get_significant_donee(df_result, i)

            #Update tests dict
            tests_performed_dict[str(*donor)][str(sorted(donees))] = str(*significative_donee)


            ## Check all phylo neighborhood to see if the significant is due to shared ancestry

            #Get sisters in the tree of significant donee
            sisters_significant_donee = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(significative_donee)].get_ancestors()[0].idx) 

            #Remove donor, sig_donee and outgroup from sisters list to avoid test donor-donor or donor-outgroup, donee-donee
            for ele in donor:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)        
            for ele in outgroup:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)
            for ele in significative_donee:
                if ele in sisters_significant_donee:
                    sisters_significant_donee.remove(ele)

            if verbose: print("donor: ", donor)
            if verbose: print("significative donee: ", significative_donee)
            
            
            in_clade_significants = []
            
            ## Test shared ancestry
            #Do nested test having two fixed elements, donor and one donee (being this one the significant_donee)
            #Assumption: if a vs b is significant, that significancy will maintain if we do a test involving
            #a vs b vs b_sister. If b significancy is lost, return it as false positive.
            if len(sisters_significant_donee) > 0:
                
                
                if verbose: print("--- Testing against sisters: ", sisters_significant_donee, "---")
                
                #Test against all sisters
                for sister in sisters_significant_donee:
                    s_sd = str(sorted([sister, *significative_donee]))
                    
                    

                    #Check if the test was already performed to skip it
                    if s_sd not in tests_performed_dict[str(*donor)].keys():
                        # Create the test
                        in_clade_test = {"p1":significative_donee,"p2":[sister],"p3":donor,"p4":outgroup}
                        
                        # Do a baba for the test
                        n_tests += 1
                        try:
                            in_clade_df_result = data.run_test(in_clade_test, nboots=100, quiet=True)
                        except:
                            continue
                        
                        
                        # If donee 2 is significant, save in performed test and add this result to the clade
                        if in_clade_df_result["Z"].values[0] > zscoreTH:
                            in_clade_sd = get_significant_donee(in_clade_df_result, in_clade_test)
#                             tests_performed_dict[str(*donor)][s_sd] = str(*in_clade_sd) #ToDo, this registry can include untested significants
                            
                            #Add to a list of significants in the clade, if more than one is in this list shared ancestry could be true
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(str(*in_clade_sd))
                                          
                            if verbose: print("- significative donee found for", s_sd, ": ", str(*in_clade_sd))
                        else:
                            in_clade_sd = None
                            tests_performed_dict[str(*donor)][s_sd] = None
                            if verbose: print("- no significative donee found in", s_sd)
                        
                    else:
                        #If test was already done, just copy the result
                        in_clade_sd = tests_performed_dict[str(*donor)][s_sd]
                        if verbose: print("- test already performed (skipped) " + s_sd + ". Previous significative donee: ", tests_performed_dict[str(*donor)][s_sd])
                        #If result is different to None add it to in_clade_significants for further interpretation
                        if in_clade_sd:
                            if in_clade_sd not in in_clade_significants:
                                in_clade_significants.append(in_clade_sd)
            
            
            
            
            
            else:
                #If no more sisters in the clade because all of them were included in the main test.
                #For now it is assumed as false positive. ToDo: explore more about it.
#                 if verbose: print("All sisters already in the test")
                if verbose: print("Shared ancestry or too close related, assumed as false positive")
#                 if str(*significative_donee) not in in_clade_significants:
#                     in_clade_significants.append(str(*significative_donee))
                    
                    
            ## Decision maker
            # Based on in_clade_significants results in previous steps (a.k.a. true positives for verification) do               
            if verbose: print("in_clade_significants", in_clade_significants)
                
            if in_clade_significants:
                if str(*significative_donee) in in_clade_significants:
                    if len(in_clade_significants) > 1:
                        if verbose: print("shared true")
                        #ToDo: maybe return ancestral node instead of tip in tree. Do some simulations with this scenario
                    else:
                        if verbose: print("true positive for verification")
                    
#                    
                    
                    ## Distance test: despite there is some true positive, it still may be false negative. For example
                    # tests where the pair compared are very distant from the donor, so any minimal
                    # allele frequency common in both may be give this false result
                    # Using sister as outgroups and put the outgroup as pair, I induce a max distancing distorion
                    # if it pass, true positive is verified, if not, it may be a artifact caused by distance
                    #distant test to reduce false positives caused by distant samples
                    if distant:
                        sisters_donor = tree.get_tip_labels(tree.idx_dict[tree.get_mrca_idx_from_tip_labels(donor)].get_ancestors()[0].idx) 
                        #remove donor from sister group
                        if str(*donor) in sisters_donor:
                            sisters_donor.remove(str(*donor))
                        #remove significant donee from sister group
                        if str(*in_clade_significants) in sisters_donor:
                            sisters_donor.remove(str(*in_clade_significants))


                        #if still something is in sisters_donor group do distant test
                        if sisters_donor:

                            #Create distant test
                            distant_test = {"p1":outgroup,"p2":in_clade_significants,"p3":donor,"p4":sisters_donor}
                            if verbose: print(">>> distant test to verify significant donee:", distant_test, "<<<")

                            #Do distant test
                            n_tests += 1
                            try:
                                distant_df_result = data.run_test(distant_test, nboots=100, quiet=True)
                            except:
                                continue



                           
                            if distant_df_result["Z"].values[0] > zscoreTH:
                                distant_sd = get_significant_donee(distant_df_result, distant_test)
                                if distant_sd == in_clade_significants:
                                    if (*donor, *in_clade_significants) not in truepositives_network:
                                        truepositives_network.append((*donor, *in_clade_significants))
                                    if verbose: print("true verified", distant_sd)
                                else:
                                    if verbose: print("falsed by distance", distant_sd)
                                    n_fp += 1
                            else:
                                if verbose: print("no signficant in this test true positive rejected")
                                n_fp += 1

                        # when no sisters to tests distance bias do
                        else:
                            if verbose: print("imposible to do distant tests") #significant donnee assummed as false positive")
    #                         if (*donor, *in_clade_significants) not in truepositives_network:
    #                             truepositives_network.append((*donor, *in_clade_significants))
                    else:
                         truepositives_network.append((*donor, *in_clade_significants))
                        
                



                else:
                    if verbose: print("false but others in the clade?")
                    n_fp += 1
                    
                    
            else:
                if verbose: print("false")
                n_fp += 1
                    
            
            
        
        
        #No significant result in test
        else:
            if verbose: print("No significant donee in test")
            tests_performed_dict[str(*donor)][str(sorted(donees))] = None
    
    if verbose: print ("\nNumber of tests performed:" + str(n_tests))
    if verbose: print ("False positives depured:" + str(n_fp))
#     if verbose: print (tests_performed_dict)
    return truepositives_network

In [24]:
# generate a balance tree
tree1 = toytree.rtree.baltree(ntips=6, treeheight=10e6)

# draw the tree w/ an admixture edge
tree1.draw(ts='p', admixture_edges=(2, 3));

In [25]:
# init a baba tool from your SNPs database
baba = ipa.baba21("test-baba-miss50.snps.hdf5")

In [26]:
#multiple donor test All vs All
source = tree1.get_tip_labels()
targets = tree1.get_tip_labels()
outgroup = "r5"

tests2 = generate_tests_from_names(source, targets, outgroup, no_repeat=False)
tests2

[{'p1': ['r1'], 'p2': ['r2'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r1'], 'p3': ['r0'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r2'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r3'], 'p3': ['r1'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r3'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r3'], 'p2': ['r4'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r0'], 'p3': ['r2'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r1'], 'p2': ['r2'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r2'], 'p2': ['r4'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r4'], 'p2': ['r1'], 'p3': ['r3'], 'p4': ['r5']},
 {'p1': ['r0'], 'p2': ['r1'], 'p3': ['r4'], 'p4': ['r5']

In [27]:
run_tests_depuring(baba, tests2, tree1, verbose=False)

[('r2', 'r3'), ('r3', 'r2')]

In [28]:
run_tests_depuring(baba, tests2, tree1, verbose=False, distant=False)

[('r2', 'r3'), ('r3', 'r2'), ('r4', 'r2')]