# Creating and Testing Simulations

In [1]:
import toytree
import ipcoal

## Random tree

In [22]:
tree = toytree.rtree.imbtree(ntips=10, treeheight=1e5)
tree.draw(ts='p')

(<toyplot.canvas.Canvas at 0x7ff557493eb0>,
 <toyplot.coordinates.Cartesian at 0x7ff557fb1280>,
 <toytree.Render.ToytreeMark at 0x7ff553dcaee0>)

## Simulating Introgression

In [23]:
tree.draw(ts='p', admixture_edges=[(3,8)])

(<toyplot.canvas.Canvas at 0x7ff5555589a0>,
 <toyplot.coordinates.Cartesian at 0x7ff5587ddf40>,
 <toytree.Render.ToytreeMark at 0x7ff5584073d0>)

In [24]:
mod_introgress =  ipcoal.Model(tree=tree, Ne=1e5, admixture_edges=[(3, 8, 0.5, 0.5)], nsamples=1)
mod_introgress.sim_loci(nloci=1, nsites=100000) #1 haploid chromosome
genos_introgress=mod_introgress.write_vcf().iloc[:, 9:].T #i need a matrix of 1's and 0's
genos_introgress

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590
r0,0,0,1,0,0,0,1,0,0,1,...,0,0,0,0,0,1,0,0,0,0
r1,0,0,1,0,0,0,1,0,0,0,...,0,1,1,1,0,0,0,1,0,0
r2,1,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
r3,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1
r4,1,1,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,1,0,0,1
r5,0,0,1,1,0,0,1,0,1,0,...,0,0,0,0,0,0,1,0,0,1
r6,1,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
r7,0,0,1,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
r8,0,0,1,0,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,1,0
r9,1,1,0,0,0,1,0,0,2,0,...,0,0,0,0,0,0,1,0,0,0


## Simulating High ILS

I want to include high ILS as an option for null comparison

In [25]:
mod_highILS =  ipcoal.Model(tree=tree, Ne=1e8, nsamples=1)
mod_highILS.sim_loci(nloci=1, nsites=1000) #1 haploid chromosome
genos_highILS=mod_highILS.write_vcf()
data_highILS = genos_highILS
genos_highILS

#getting 2's and 3's

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,1,.,G,"A,C,T",99,PASS,.,GT,0,3,1,0,3,1,1,1,2,3
1,0,2,.,G,"A,C,T",99,PASS,.,GT,2,0,1,2,0,2,1,1,3,0
2,0,3,.,T,"A,C,G",99,PASS,.,GT,3,1,1,3,2,1,0,0,0,2
3,0,4,.,T,"C,G",99,PASS,.,GT,2,2,0,2,0,0,1,1,0,0
4,0,5,.,T,"A,C",99,PASS,.,GT,0,2,0,2,2,1,2,0,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,0,996,.,A,G,99,PASS,.,GT,1,0,1,0,0,0,0,0,0,0
983,0,997,.,C,"A,G",99,PASS,.,GT,2,1,2,0,0,2,1,0,0,2
984,0,998,.,G,"A,C,T",99,PASS,.,GT,1,2,1,1,1,3,1,1,0,2
985,0,999,.,T,"A,C,G",99,PASS,.,GT,1,0,1,1,2,2,1,2,3,3


In [26]:
mod_highILS.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"(r7:1.92172e+08,((r9:1.8..."
1,0,1,2,1,1,1,"(r7:1.58873e+08,((r9:1.8..."
2,0,2,4,2,2,2,"(r7:1.58873e+08,((r5:3.5..."
3,0,4,5,1,1,3,"(r7:2.40038e+08,((r5:3.5..."
4,0,5,6,1,1,4,"(r7:2.22232e+08,((r5:3.5..."
...,...,...,...,...,...,...,...
594,0,990,991,1,1,594,"(((r0:1.23393e+06,r2:1.2..."
595,0,991,993,2,2,595,"(((r0:1.23393e+06,r2:1.2..."
596,0,993,998,5,5,596,"(((r0:1.23393e+06,r2:1.2..."
597,0,998,999,1,1,597,"(((r0:1.23393e+06,r2:1.2..."


In [27]:
# a dictionary of arguments to style the drawings
kwargs = {
    "ts": "c",
    "tip_labels": True,
    "shared_axis": True,
    "width": 600,
    "height": 200,
    "node_sizes": 6,
}
toytree.mtree(mod_highILS.df.genealogy).draw()

(<toyplot.canvas.Canvas at 0x7ff55933b400>,
 [<toyplot.coordinates.Cartesian at 0x7ff55933b3a0>,
  <toyplot.coordinates.Cartesian at 0x7ff55933be80>,
  <toyplot.coordinates.Cartesian at 0x7ff5593487f0>,
  <toyplot.coordinates.Cartesian at 0x7ff55934c160>],
 [<toytree.Render.ToytreeMark at 0x7ff55934ca90>,
  <toytree.Render.ToytreeMark at 0x7ff55934cb50>,
  <toytree.Render.ToytreeMark at 0x7ff55934cb80>,
  <toytree.Render.ToytreeMark at 0x7ff55934cbb0>])

## Slow mutation rate

In [28]:
mod_slowmut =  ipcoal.Model(tree=tree, Ne=1e6, nsamples=1)
mod_slowmut.sim_loci(nloci=1, nsites=100) #1 haploid chromosome
genos_slowmut=mod_slowmut.write_vcf().iloc[:, 9:].T
genos_slowmut

#still getting 2's...sim_loci probably isn't right for what I want

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
r0,0,0,0,0,1,0,0,0,0,0
r1,0,0,0,0,1,0,0,0,0,0
r2,1,0,1,0,0,1,0,0,0,0
r3,0,0,0,0,0,0,1,1,0,0
r4,0,0,1,0,0,1,0,0,0,0
r5,0,1,1,0,0,1,0,0,0,0
r6,0,0,0,0,0,0,0,0,1,0
r7,0,0,1,0,0,1,0,0,0,1
r8,0,0,1,1,0,1,0,0,0,1
r9,1,0,1,0,0,1,0,0,0,1


## Trying to simulate SNPs

In [31]:
model = ipcoal.Model(tree, Ne=1e5, nsamples=1) #initialize model

In [32]:
model.sim_snps(nsnps=100) #simulate snps

In [33]:
model.df #look at dataframe

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"(r6:367457,(r8:171024,((..."
1,1,0,1,1,1,0,"((r5:116213,(r8:113972,r..."
2,2,0,1,1,1,0,"(((r0:55561.9,r5:55561.9..."
3,3,0,1,1,1,0,"((r1:176125,(r9:113921,(..."
4,4,0,1,1,1,0,"((r3:141210,(r8:123326,(..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"((r6:93426,(r2:55068.3,r..."
96,96,0,1,1,1,0,"(((r6:126447,(r4:107894,..."
97,97,0,1,1,1,0,"(r7:190087,((r0:76920.3,..."
98,98,0,1,1,1,0,"((r9:138946,(r3:105167,(..."


In [34]:
kwargs = {
    "ts": "c",
    "tip_labels": True,
    "width": 600,
    "height": 200,
    "node_sizes": 6,
}
toytree.mtree(model.df.genealogy).draw(**kwargs); #draw genealogies

In [35]:
snps=model.write_vcf()
snp_data = snps.iloc[:, 9:].T
snps

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9
0,0,1,.,C,A,99,PASS,.,GT,0,0,1,0,0,0,0,0,0,0
1,1,1,.,G,T,99,PASS,.,GT,0,0,0,0,0,0,0,0,0,1
2,2,1,.,C,A,99,PASS,.,GT,1,1,0,0,0,1,1,0,1,0
3,3,1,.,T,A,99,PASS,.,GT,1,0,0,0,0,0,0,0,0,0
4,4,1,.,T,G,99,PASS,.,GT,0,1,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,1,.,T,A,99,PASS,.,GT,0,0,1,1,0,0,1,0,0,0
96,96,1,.,T,A,99,PASS,.,GT,0,0,0,0,0,1,0,0,0,0
97,97,1,.,T,C,99,PASS,.,GT,0,0,0,0,0,0,0,0,1,0
98,98,1,.,T,G,99,PASS,.,GT,0,0,0,0,0,0,0,1,0,0


In [45]:
csv = snp_data.to_csv(file1)

In [6]:
mydata = pd.read_csv(file1, index_col=0)
#mydata.drop(["Unnamed: 0"], axis=1)
mydata

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
r0,0,0,1,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
r1,0,0,1,0,1,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
r2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
r3,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,0,0,0
r4,0,0,0,0,1,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,1
r5,0,0,1,0,1,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
r6,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
r7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
r8,0,0,1,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,1,0,0
r9,0,1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
model.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"((r6:130956,(r7:130407,(..."
1,1,0,1,1,1,0,"((r4:125522,(r8:103182,(..."
2,2,0,1,1,1,0,"(r9:356573,(r1:210923,(r..."
3,3,0,1,1,1,0,"((r9:118795,(r0:96411,(r..."
4,4,0,1,1,1,0,"(((r4:62954,(r0:44648.2,..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"((r9:148718,(r0:97078,r8..."
96,96,0,1,1,1,0,"(r8:968471,((r5:131232,(..."
97,97,0,1,1,1,0,"(r7:265789,(((r0:86569.4..."
98,98,0,1,1,1,0,"((r6:103972,(r8:98686.5,..."


## Simulating SNPs in the Presence of Introgression

I want to simulate an admixture event close to recent between divergent lineages (mimic horizontal gene transfer). I want a simulated dataset that I can run through Hogtie as a test.

In [5]:
hgt_tree = toytree.rtree.imbtree(ntips=10, treeheight=1e10)
hgt_tree.draw(ts='p', admixture_edges=(2,8))

(<toyplot.canvas.Canvas at 0x7ff550c5fca0>,
 <toyplot.coordinates.Cartesian at 0x7ff550c5ffd0>,
 <toytree.Render.ToytreeMark at 0x7ff553d8bc10>)

In [7]:
snp_introgression_model =  ipcoal.Model(tree=hgt_tree, Ne=1e8, admixture_edges=[(2, 8, 0.6, 0.9)], nsamples=1)

In [8]:
snp_introgression_model.sim_snps(nsnps=100, repeat_on_trees=1)

In [9]:
snp_introgression_model.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,1,1,1,0,"(r9:1.01123e+10,((r2:1.8..."
1,1,0,1,1,1,0,"(r9:1.01334e+10,((r2:1.6..."
2,2,0,1,1,1,0,"(r9:1.00651e+10,((r2:1.4..."
3,3,0,1,1,1,0,"(r9:1.00256e+10,((r2:1.4..."
4,4,0,1,1,1,0,"(r9:1.06606e+10,((r2:1.3..."
...,...,...,...,...,...,...,...
95,95,0,1,1,1,0,"(r9:1.01264e+10,((r2:1.3..."
96,96,0,1,1,1,0,"(r9:1.01847e+10,((r2:1.4..."
97,97,0,1,1,1,0,"(r9:1.03485e+10,((r2:1.4..."
98,98,0,1,1,1,0,"(r9:1.01603e+10,(r8:9.72..."


In [13]:
data = snp_introgression_model.write_vcf().iloc[:, 9:].T
list(data[0])

[2, 1, 0, 1, 1, 3, 0, 0, 0, 0]

In [19]:
data.to_numpy(dtype=int,copy=True)

array([[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1,
        1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
        1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 0, 1,

In [2]:
import numpy as np
import pandas as pd

In [15]:
for column in data:
    data[column] = data[column].replace([2,3],1)

In [5]:
import os
HOGTIEDIR = os.path.dirname(os.getcwd())
file1 = os.path.join(HOGTIEDIR, "sampledata", "testmatrix.csv")
file2 = os.path.join(HOGTIEDIR, "sampledata", "testtree.txt")

In [1]:
data.to_csv(path_or_buf=file1)
file1

NameError: name 'data' is not defined

In [3]:
col1 = np.array([3.3, 4.6, 6.8, 4.2, 3.1, 9.9, 6.5, 4.4, 6.1, 7.2])
df = pd.DataFrame(col1)

devs = [] #would prefer to append to an empty np.array
for like in list(df[0]):
    if like >= 5:
        devs.append(1)
    else:
        devs.append(0)

df['deviation_score'] = np.array(devs)

In [4]:
import toyplot

In [14]:
df['rollingav']= df[0].rolling(2, win_type='triang').mean()
        
colormap = toyplot.color.brewer.map("Dark2")
color = df.deviation_score


a, b, c = toyplot.plot(
    df['rollingav'],
    width = 500,
    height=500,
    color = 'blue'
);
b.hlines(
    7,
    style={"stroke": "red", "stroke-width": 2},
);

In [17]:
def graph(data):
    a, b, c = toyplot.plot(
        data,
        width = 500,
        height=500,
        color='blue'
    )
    b.hlines(7, style={"stroke": "red", "stroke-width": 2});

In [18]:
graph(df['rollingav'])