# Creating and Testing Simulations

In [1]:
import toytree
import ipcoal

## Random tree

In [2]:
tree = toytree.rtree.imbtree(ntips=10, treeheight=1e5)
tree.draw(ts='p')

(<toyplot.canvas.Canvas at 0x7ff1dfcfa520>,
 <toyplot.coordinates.Cartesian at 0x7ff1dfcfa550>,
 <toytree.Render.ToytreeMark at 0x7ff1dfd15460>)

## Simulating Introgression

In [3]:
tree.draw(ts='p', admixture_edges=[(3,8)])

(<toyplot.canvas.Canvas at 0x7ff1dfd159a0>,
 <toyplot.coordinates.Cartesian at 0x7ff1dfd155e0>,
 <toytree.Render.ToytreeMark at 0x7ff1dfd51340>)

In [4]:
mod_introgress =  ipcoal.Model(tree=tree, Ne=1e5, admixture_edges=[(3, 8, 0.5, 0.5)], nsamples=1)
mod_introgress.sim_loci(nloci=1, nsites=100000) #1 haploid chromosome
genos_introgress=mod_introgress.write_vcf().iloc[:, 9:].T #i need a matrix of 1's and 0's
genos_introgress

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924
r0,0,0,0,1,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
r1,0,0,1,1,1,0,0,1,1,0,...,0,0,0,0,0,0,0,1,0,0
r2,0,0,1,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
r3,0,0,1,1,1,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
r4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
r5,0,0,0,1,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
r6,0,0,0,1,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
r7,1,1,0,0,0,0,1,0,0,0,...,0,1,0,0,1,0,0,0,0,0
r8,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
r9,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


## Simulating High ILS

I want to include high ILS as an option for null comparison

In [7]:
mod_highILS =  ipcoal.Model(tree=tree, Ne=1e8, nsamples=1)
mod_highILS.sim_loci(nloci=1, nsites=1000) #1 haploid chromosome
genos_highILS=mod_highILS.write_vcf()
data_highILS = genos_highILS.iloc[:, 9:].T
genos_highILS

#getting 2's and 3's

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,1,.,T,"C,G",99,PASS,.,GT,1,0,0,0,0,1,1,1,2,1
1,0,2,.,T,"A,C,G",99,PASS,.,GT,3,1,1,3,2,2,1,3,3,2
2,0,3,.,G,"A,C",99,PASS,.,GT,2,1,1,2,0,0,2,2,2,2
3,0,4,.,G,"A,C",99,PASS,.,GT,2,2,2,1,0,2,0,2,1,2
4,0,5,.,A,"C,G,T",99,PASS,.,GT,3,0,0,3,2,2,0,3,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,0,996,.,G,"A,C,T",99,PASS,.,GT,1,0,0,3,1,3,1,2,2,3
992,0,997,.,A,"C,G,T",99,PASS,.,GT,1,1,1,3,1,2,1,2,0,3
993,0,998,.,A,"C,T",99,PASS,.,GT,1,2,2,0,0,2,0,1,0,0
994,0,999,.,A,"C,G,T",99,PASS,.,GT,3,1,1,0,0,3,1,0,0,2


In [8]:
mod_highILS.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"((r4:4.96439e+07,r6:4.96..."
1,0,1,5,4,4,1,"((r4:4.96439e+07,r6:4.96..."
2,0,5,7,2,2,2,"((r4:4.96439e+07,r6:4.96..."
3,0,7,14,7,7,3,"((r4:4.96439e+07,r6:4.96..."
4,0,14,19,5,5,4,"((r4:4.96439e+07,r6:4.96..."
...,...,...,...,...,...,...,...
574,0,993,995,2,2,574,"((r5:1.60541e+08,r8:1.60..."
575,0,995,996,1,1,575,"((((r1:314172,r2:314172)..."
576,0,996,997,1,1,576,"((((r1:314172,r2:314172)..."
577,0,997,999,2,2,577,"(r6:3.23338e+08,((r5:1.6..."


In [10]:
# a dictionary of arguments to style the drawings
kwargs = {
    "ts": "c",
    "tip_labels": True,
    "shared_axis": True,
    "width": 600,
    "height": 200,
    "node_sizes": 6,
}
toytree.mtree(mod_highILS.df.genealogy).draw()

(<toyplot.canvas.Canvas at 0x7ff1e592b2e0>,
 [<toyplot.coordinates.Cartesian at 0x7ff1e592b280>,
  <toyplot.coordinates.Cartesian at 0x7ff1e592bd60>,
  <toyplot.coordinates.Cartesian at 0x7ff1e59306d0>,
  <toyplot.coordinates.Cartesian at 0x7ff1e5933040>],
 [<toytree.Render.ToytreeMark at 0x7ff1e5933970>,
  <toytree.Render.ToytreeMark at 0x7ff1e5933a30>,
  <toytree.Render.ToytreeMark at 0x7ff1e5933a60>,
  <toytree.Render.ToytreeMark at 0x7ff1e5933a90>])

## Slow mutation rate

In [23]:
mod_slowmut =  ipcoal.Model(tree=tree, Ne=1e6, nsamples=1)
mod_slowmut.sim_loci(nloci=1, nsites=100) #1 haploid chromosome
genos_slowmut=mod_slowmut.write_vcf().iloc[:, 9:].T
genos_slowmut

#still getting 2's...sim_loci probably isn't right for what I want

Unnamed: 0,0,1,2,3,4
r0,0,0,0,1,1
r1,0,0,0,1,1
r2,0,0,0,1,1
r3,1,1,1,0,0
r4,0,0,0,1,1
r5,0,0,0,1,0
r6,1,1,1,0,0
r7,0,0,0,1,0
r8,1,1,1,0,0
r9,0,0,0,1,1


## Trying to simulate SNPs

In [24]:
model = ipcoal.Model(tree, Ne=1e5, nsamples=1) #initialize model

In [27]:
model.sim_snps(nsnps=100) #simulate snps

In [29]:
model.df #look at dataframe

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"((r7:170632,(r8:158097,(..."
1,1,0,1,1,1,0,"((r6:83180.6,(r0:49943.6..."
2,2,0,1,1,1,0,"(r7:324046,((r8:148604,(..."
3,3,0,1,1,1,0,"((r7:237297,(r1:99571.5,..."
4,4,0,1,1,1,0,"((r9:127230,(r7:88196.8,..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"((r8:186675,(r7:89779.1,..."
96,96,0,1,1,1,0,"(r9:264550,((r6:88373.9,..."
97,97,0,1,1,1,0,"((r2:49652.4,(r0:17336.4..."
98,98,0,1,1,1,0,"((r6:132784,(r8:99237.3,..."


In [30]:
kwargs = {
    "ts": "c",
    "tip_labels": True,
    "width": 600,
    "height": 200,
    "node_sizes": 6,
}
toytree.mtree(model.df.genealogy).draw(**kwargs); #draw genealogies

In [64]:
snps=model.write_vcf()
snp_data = snps.iloc[:, 9:].T
snps

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,1,.,C,T,99,PASS,.,GT,0,0,0,1,0,0,0,0,0,0
1,1,1,.,T,A,99,PASS,.,GT,0,0,0,1,0,0,0,1,0,1
2,2,1,.,C,A,99,PASS,.,GT,0,0,0,0,0,1,0,0,1,0
3,3,1,.,A,G,99,PASS,.,GT,0,0,1,0,0,0,0,0,0,0
4,4,1,.,C,T,99,PASS,.,GT,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,1,.,A,G,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0
96,96,1,.,A,G,99,PASS,.,GT,0,0,0,0,0,0,0,0,1,0
97,97,1,.,T,G,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0
98,98,1,.,T,A,99,PASS,.,GT,0,0,1,0,0,0,0,0,1,0


In [66]:
snp_data.to_csv()

',0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99\nr0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0\nr1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0\nr2,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1\nr3,1,1,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,

In [48]:
model.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"((r6:130956,(r7:130407,(..."
1,1,0,1,1,1,0,"((r4:125522,(r8:103182,(..."
2,2,0,1,1,1,0,"(r9:356573,(r1:210923,(r..."
3,3,0,1,1,1,0,"((r9:118795,(r0:96411,(r..."
4,4,0,1,1,1,0,"(((r4:62954,(r0:44648.2,..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"((r9:148718,(r0:97078,r8..."
96,96,0,1,1,1,0,"(r8:968471,((r5:131232,(..."
97,97,0,1,1,1,0,"(r7:265789,(((r0:86569.4..."
98,98,0,1,1,1,0,"((r6:103972,(r8:98686.5,..."


## Simulating SNPs in the Presence of Introgression

I want to simulate an admixture event close to recent between divergent lineages (mimic horizontal gene transfer). I want a simulated dataset that I can run through Hogtie as a test.

In [34]:
hgt_tree = toytree.rtree.imbtree(ntips=10, treeheight=1e10)
tree.draw(ts='p', admixture_edges=(2,8))

(<toyplot.canvas.Canvas at 0x7ff1e60fe6a0>,
 <toyplot.coordinates.Cartesian at 0x7ff1e60fdac0>,
 <toytree.Render.ToytreeMark at 0x7ff1e67709d0>)

In [35]:
snp_introgression_model =  ipcoal.Model(tree=tree, Ne=1e8, admixture_edges=[(2, 8, 0.6, 0.9)], nsamples=1)

In [40]:
snp_introgression_model.sim_snps(nsnps=100, repeat_on_trees=1)

In [41]:
snp_introgression_model.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"((r0:1.14091e+07,(r1:1.8..."
1,1,0,1,1,1,0,"(r4:2.4852e+08,(r9:1.017..."
2,2,0,1,1,1,0,"((r7:8.84027e+07,(r0:7.6..."
3,3,0,1,1,1,0,"((r6:2.00546e+07,r9:2.00..."
4,4,0,1,1,1,0,"((r6:7.93512e+07,(r0:2.4..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"((r0:1.2943e+08,((r5:2.0..."
96,96,0,1,1,1,0,"(r2:1.32251e+08,((r1:1.7..."
97,97,0,1,1,1,0,"(r7:7.24663e+08,((r4:1.3..."
98,98,0,1,1,1,0,"(((r3:7.45509e+06,r5:7.4..."


In [47]:
data = snp_introgression_model.write_vcf().iloc[:, 9:].T
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
r0,1,2,0,0,2,3,3,2,2,1,...,3,1,2,1,3,2,0,3,2,1
r1,1,2,0,2,0,1,0,1,3,2,...,1,2,0,0,3,1,0,1,1,1
r2,1,2,2,1,1,0,3,3,1,3,...,1,1,0,2,3,1,0,1,3,1
r3,0,2,1,1,2,3,3,2,1,2,...,0,2,0,2,1,1,0,3,1,1
r4,0,0,1,3,0,2,0,2,1,2,...,2,3,1,1,3,1,0,0,1,2
r5,0,2,0,0,2,0,2,0,2,3,...,3,2,0,1,3,1,1,1,1,1
r6,0,1,1,2,1,2,3,2,1,0,...,1,2,0,2,1,1,0,0,1,1
r7,0,2,0,1,0,2,1,3,1,1,...,2,2,1,2,0,1,0,1,1,1
r8,0,2,2,0,0,2,0,0,2,1,...,0,3,2,0,2,2,0,1,1,1
r9,0,0,0,3,0,3,2,0,1,2,...,2,2,0,0,3,1,0,2,1,2


In [50]:
for index, row in data.iterrows():
    if index[row] != 0:
        index[row].str.replace(index[row], '1')

TypeError: string indices must be integers