# Running Hogtie Using different genealogies

In [1]:
import ipcoal
import toytree
import os
import numpy as np
import pandas as pd

In [2]:
#get a random tree
tree = toytree.rtree.imbtree(ntips=10, treeheight=1e5)
tree.draw(ts='p')

(<toyplot.canvas.Canvas at 0x7fbc62b54070>,
 <toyplot.coordinates.Cartesian at 0x7fbc664b25e0>,
 <toytree.Render.ToytreeMark at 0x7fbc664d54c0>)

In [3]:
#create model
mod_introgress =  ipcoal.Model(tree=tree, Ne=1e5, admixture_edges=[(3, 8, 0.5, 0.5)], nsamples=1)
mod_introgress.sim_loci(nloci=1, nsites=1000) #1 haploid chromosome
genos_introgress=mod_introgress.write_vcf() #i need a matrix of 1's and 0's
genos_introgress

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,24,.,C,A,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0
1,0,48,.,G,C,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
2,0,101,.,A,T,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
3,0,127,.,T,C,99,PASS,.,GT,0,0,1,0,0,0,0,0,0,0
4,0,171,.,C,T,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
5,0,199,.,G,T,99,PASS,.,GT,1,0,0,0,0,0,0,1,1,0
6,0,292,.,A,T,99,PASS,.,GT,0,0,0,0,0,0,1,0,0,0
7,0,391,.,G,T,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
8,0,433,.,C,A,99,PASS,.,GT,0,1,0,0,1,0,0,0,0,0
9,0,452,.,A,C,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0


In [128]:
mod_introgress.df.iloc[1, 2]

1000

In [135]:
mod_snps =  ipcoal.Model(tree=tree, Ne=1e5, admixture_edges=[(3, 8, 0.5, 0.5)], nsamples=1)
mod_snps.sim_snps(nsnps=10)
mod_snps.write_vcf()

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,1,.,G,T,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0
1,1,1,.,C,G,99,PASS,.,GT,0,0,0,0,0,1,1,0,1,1
2,2,1,.,T,A,99,PASS,.,GT,0,0,0,0,0,1,0,0,0,0
3,3,1,.,C,T,99,PASS,.,GT,1,0,0,0,1,0,0,0,0,1
4,4,1,.,T,A,99,PASS,.,GT,1,1,1,0,1,1,1,0,0,0
5,5,1,.,A,G,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
6,6,1,.,A,G,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
7,7,1,.,T,C,99,PASS,.,GT,1,0,0,0,0,0,0,0,0,0
8,8,1,.,G,C,99,PASS,.,GT,0,0,0,0,0,0,0,0,0,1
9,9,1,.,G,T,99,PASS,.,GT,0,0,0,0,0,0,0,0,0,1


In [136]:
mod_snps.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"(r0:692999,((r6:103284,(..."
1,1,0,1,1,1,0,"((r1:186833,(r4:116533,(..."
2,2,0,1,1,1,0,"((r1:97171.6,r7:97171.6)..."
3,3,0,1,1,1,0,"((r9:102859,(r0:58635.4,..."
4,4,0,1,1,1,0,"((r7:137893,(r9:124499,(..."
5,5,0,1,1,1,0,"((r1:122382,r9:122382):7..."
6,6,0,1,1,1,0,"(r5:343703,(((r3:38845.3..."
7,7,0,1,1,1,0,"((r3:111560,(r5:93424.1,..."
8,8,0,1,1,1,0,"(((r3:70053.8,r8:70053.8..."
9,9,0,1,1,1,0,"((r5:126759,((r1:52555.2..."


In [3]:
#works for simulated loci, not snps
HOGTIEDIR = os.path.dirname(os.getcwd())
def genealogy_try2(model):
    """
    model must be an ipcoal model object
    objective: make a unique dataframe for each genealogy with the sites that follow
    that genealogy
    
    TO DO: concatenate the reordered dataframes into one big dataframe that can be run through MatrixParser
    """
    vcf = model.write_vcf()
    
    dataframe = 0
    for idx in model.df.index:
        dataframe += 1
        start = model.df.iloc[idx, 1]
        end = model.df.iloc[idx ,2]
        gen = toytree.tree(model.df.iloc[idx, 6], tree_format=0)
        
        count = 0
        df = pd.DataFrame()
        for row in vcf.index:
            count += 1
            if start < vcf.iloc[row, 1] < end:
                df[f'{count}'] = vcf.iloc[row, 9:]
        df = df.reindex(gen.get_tip_labels())
        file = os.path.join(HOGTIEDIR, "sampledata", f"genealogy{dataframe}.csv")
        df.to_csv(file)

In [137]:
genealogy_try2(mod_introgress)

0
308
    1  2  3  4  5  6  7
r8  0  0  0  0  0  1  0
r7  1  0  0  0  0  1  0
r0  0  0  0  0  0  1  0
r2  0  0  0  1  0  0  0
r4  0  0  0  0  0  0  0
r1  0  0  0  0  0  0  0
r5  0  0  0  0  0  0  0
r6  0  0  0  0  0  0  1
r9  0  0  0  0  0  0  0
r3  0  1  1  0  1  0  0
308
1000
    8  9 10 11 12 13 14 15 16 17 18
r4  0  1  0  0  0  0  0  0  1  0  1
r1  0  1  0  0  0  0  0  0  1  0  0
r5  0  0  0  0  0  0  0  0  1  0  0
r6  0  0  0  0  0  0  0  1  1  0  0
r9  0  0  0  0  0  0  1  0  1  0  0
r8  0  0  0  1  0  0  0  0  0  0  0
r7  0  0  1  0  0  0  0  0  0  1  0
r0  0  0  0  0  0  0  0  0  0  0  0
r2  0  0  0  0  0  1  0  0  0  0  0
r3  1  0  0  0  1  0  0  0  0  0  0


In [79]:
test.reorder()
test.df

Unnamed: 0,1,2,3,4,5,6,7
r0,0,0,0,0,0,1,0
r1,0,0,0,0,0,0,0
r2,0,0,0,1,0,0,0
r3,0,1,1,0,1,0,0
r4,0,0,0,0,0,0,0
r5,0,0,0,0,0,0,0
r6,0,0,0,0,0,0,1
r7,1,0,0,0,0,1,0
r8,0,0,0,0,0,1,0
r9,0,0,0,0,0,0,0


In [70]:
gen = toytree.tree(mod_introgress.df.iloc[0, 6], tree_format=0)
print(gen.get_tip_labels())
data.reindex(gen.get_tip_labels())

['r8', 'r7', 'r0', 'r2', 'r4', 'r1', 'r5', 'r6', 'r9', 'r3']


Unnamed: 0,1,2,3,4,5,6,7
r8,0,0,0,0,0,1,0
r7,1,0,0,0,0,1,0
r0,0,0,0,0,0,1,0
r2,0,0,0,1,0,0,0
r4,0,0,0,0,0,0,0
r1,0,0,0,0,0,0,0
r5,0,0,0,0,0,0,0
r6,0,0,0,0,0,0,1
r9,0,0,0,0,0,0,0
r3,0,1,1,0,1,0,0


In [30]:
HOGTIEDIR = os.path.dirname(os.getcwd())
def genealogy(model):
    
    vcf = model.write_vcf()
    matrix = vcf.iloc[:, 9:]
    position = vcf["POS"]
    
    count = 0
    for row in model.df.index:
        count += 1
        print (row)
        start = df.iloc[row, 1]
        print(start)
        end = df.iloc[row, 2]
        print(end)
        newick = mod_introgress.df.iloc[row, 6]
        gen = toytree.tree(newick, tree_format=0)

        for idx in vcf.index:
            #trait = np.empty((len(gen.get_tip_labels()), 0),float)
            if start < position[idx] < end:
                values = list(matrix.iloc[idx, :])
                df = pd.DataFrame()
                df[f'gen{count}'] = values
        
        print (df)        
        matrix.iloc[start:end,:].T.reindex(gen.get_tip_labels())
        count+=1
        file = os.path.join(HOGTIEDIR, "sampledata", f"genealogy{count}.csv")
        new_matrix.to_csv(file)

In [31]:
genealogy(mod_introgress)

0
0
308
   gen1
0     0
1     0
2     0
3     0
4     0
5     0
6     1
7     0
8     0
9     0
1


IndexError: single positional indexer is out-of-bounds