In [1]:
import ipyrad.analysis as ipa
import toyplot
import toytree
import ipcoal

Window extracter parameters are altered to produce a subset of ~200-220 taxa.  The subsampling can then be implemented in mrbayes, with the goal of generating trees with good-looking branch lengths.

In [18]:
wex05_pt = ipa.window_extracter(
    name = "pt_scaffold05",
    data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.seqs.hdf5",
    workdir = "/moto/eaton/users/hnl2109/analysis-window_extracter",
    scaffold_idxs = [5],
    mincov = 220,
    rmincov = 0.9
)

wex05_pt.stats

Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
prefilter,Qrob_Chr06,0,,1661653,64087,0.92,261
postfilter,Qrob_Chr06,0,,30836,3643,0.05,217


In [19]:
# Write to a nexus file for implementation in mrbayes.
wex05_pt.run(nexus = True)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex


In [49]:
# Write to phylip to run through RAxML.
wex05_pt.run(nexus = False)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.phy


In [50]:
rax05_pt = ipa.raxml(
    name = "pt_scaffold05",
    data = wex05_pt.outfile,
    workdir = "/moto/eaton/users/hnl2109/analysis-raxml",
    N = 100,
    T = 12,
)

rax05_pt.command

'/moto/home/hnl2109/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 12 -m GTRGAMMA -n pt_scaffold05 -w /moto/eaton/users/hnl2109/analysis-raxml -s /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.phy -p 54321 -N 100 -x 12345'

In [51]:
rax05_pt.run()

job pt_scaffold05 finished successfully


In [8]:
wex_full = ipa.window_extracter(
    name = "full_scaffold",
    data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.seqs.hdf5",
    workdir = "/moto/eaton/users/hnl2109/analysis-window_extracter",
    scaffold_idxs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
)

wex_full.stats

Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
0,concatenated,0,18176689,18176689,793477,0.919,261


In [35]:
wex_full.run(nexus = True)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/full_scaffold.nex


In [9]:
wex_full.run(nexus = False)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/full_scaffold.phy


In [16]:
rax_full = ipa.raxml(
    name = "full_scaffold",
    data = wex_full.outfile,
    workdir = "/moto/eaton/users/hnl2109/analysis-raxml",
    N = 100,
    T = 12,
)

rax_full.command

'/moto/home/hnl2109/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 12 -m GTRGAMMA -n full_scaffold -w /moto/eaton/users/hnl2109 -s /moto/eaton/users/hnl2109/analysis-window_extracter/full_scaffold.phy -p 54321 -N 100 -x 12345'

In [4]:
TREE = toytree.tree("/moto/eaton/users/hnl2109/analysis-tetrad/robur-ref-basic.tree")
model = ipcoal.Model(TREE, Ne=2e4, nsamples=2, recomb=0)

In [38]:
# Setup for test under a relaxed clock model.
mbt = ipa.mrbayes(
    data="/moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex",
    name="itest-1",
    workdir="/moto/eaton/users/hnl2109/analysis-mrbayes/",
    clock_model=2,
    ngen=int(1e6),
    nruns=2,
)

print(mb1.params)

brlenspr      clock:uniform       
clockratepr   normal(0.01,0.005)  
clockvarpr    igr                 
igrvarpr      exp(10.0)           
nchains       4                   
ngen          1000000             
nruns         2                   
samplefreq    1000                
topologypr    uniform             



In [39]:
mb1.run(force=True)

job itest-1 finished successfully


In [13]:
# Tree constraints.
mb2 = ipa.mrbayes(
    data="/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.nex",
    name="itest-2",
    workdir="/moto/eaton/users/hnl2109/analysis-mrbayes/",
    constraints=TREE,
    clock_model=2,
    ngen=int(1e6),
    nruns=2,
)

print(mb2.params)

brlenspr      clock:uniform       
clockratepr   normal(0.01,0.005)  
clockvarpr    igr                 
igrvarpr      exp(10.0)           
nchains       4                   
ngen          1000000             
nruns         2                   
samplefreq    1000                
topologypr    fixed(fixedtree)    



In [14]:
print(mb2.nexus_string)

#NEXUS

[log block]
log start filename=/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex.log replace;

[data block]
execute /moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.nex;

[tree block]
begin trees;
  tree fixedtree = ((SRR5632571,(SRR5632521,(SRR5632378,(SRR5284330,SRR5632566)))),((SRR5632394,SRR5632524),(((SRR1915561,SRR1915539,SRR1915525),((SRR1915528,SRR1915547,SRR1915542),(SRR1915552,(SRR1915534,(SRR1915549,(SRR1915533,SRR1915548)))))),(SRR5632586,(((SRR5284342,(SRR5284372,SRR5284351)),((SRR5632592,(SRR5284345,((SRR8860721,SRR5632587),(SRR5984320,(SRR5632548,SRR5632357))))),(((reference,SRR8860678),(SRR5632365,(SRR8860589,SRR8860611))),(SRR8860703,(SRR5632599,((SRR8860536,(SRR5632410,(SRR8860656,SRR8860609))),(SRR8860698,((SRR5632407,SRR8860672),((SRR8860650,SRR8860521),(SRR5632542,SRR8860665)))))))))),(((SRR5632344,SRR5632428),(SRR5632377,((SRR5632345,SRR5632411),(SRR5632413,(SRR5632382,SRR5632369))))),((SRR5284355,SRR5632514,SRR528435

In [16]:
mb2.run(force = True)

IPyradError: 

                            MrBayes 3.2.7 x86_64

                      (Bayesian Analysis of Phylogeny)

              Distributed under the GNU General Public License


               Type "help" or "help <command>" for information
                     on the commands that are available.

                   Type "about" for authorship and general
                       information about the program.



   Executing file "/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex"
   UNIX line termination
   Longest line length = 3382
   Parsing file
   Expecting NEXUS formatted file
   Logging screen output to file "/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex.log"

   Executing file "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.nex"...

   UNIX line termination
   Longest line length = 118
   Parsing file
   Expecting NEXUS formatted file
   Reading data block
      Allocated taxon set
      Allocated matrix
      Defining new matrix with 260 taxa and 23497605 characters
      Data is Dna
      Missing data coded as N
      Gaps coded as -
      Data matrix is interleaved
      Taxon   1 -> SRR1915525
      Taxon   2 -> SRR1915528
      Taxon   3 -> SRR1915533
      Taxon   4 -> SRR1915534
      Taxon   5 -> SRR1915539
      Taxon   6 -> SRR1915542
      Taxon   7 -> SRR1915547
      Taxon   8 -> SRR1915548
      Taxon   9 -> SRR1915549
      Taxon  10 -> SRR1915552
      Taxon  11 -> SRR1915561
      Taxon  12 -> SRR5284330
      Taxon  13 -> SRR5284335
      Taxon  14 -> SRR5284337
      Taxon  15 -> SRR5284339
      Taxon  16 -> SRR5284342
      Taxon  17 -> SRR5284345
      Taxon  18 -> SRR5284351
      Taxon  19 -> SRR5284352
      Taxon  20 -> SRR5284355
      Taxon  21 -> SRR5284357
      Taxon  22 -> SRR5284372
      Taxon  23 -> SRR5632344
      Taxon  24 -> SRR5632345
      Taxon  25 -> SRR5632346
      Taxon  26 -> SRR5632348
      Taxon  27 -> SRR5632353
      Taxon  28 -> SRR5632355
      Taxon  29 -> SRR5632357
      Taxon  30 -> SRR5632358
      Taxon  31 -> SRR5632361
      Taxon  32 -> SRR5632363
      Taxon  33 -> SRR5632365
      Taxon  34 -> SRR5632369
      Taxon  35 -> SRR5632371
      Taxon  36 -> SRR5632373
      Taxon  37 -> SRR5632374
      Taxon  38 -> SRR5632377
      Taxon  39 -> SRR5632378
      Taxon  40 -> SRR5632379
      Taxon  41 -> SRR5632380
      Taxon  42 -> SRR5632381
      Taxon  43 -> SRR5632382
      Taxon  44 -> SRR5632383
      Taxon  45 -> SRR5632384
      Taxon  46 -> SRR5632385
      Taxon  47 -> SRR5632390
      Taxon  48 -> SRR5632393
      Taxon  49 -> SRR5632394
      Taxon  50 -> SRR5632397
      Taxon  51 -> SRR5632401
      Taxon  52 -> SRR5632403
      Taxon  53 -> SRR5632404
      Taxon  54 -> SRR5632406
      Taxon  55 -> SRR5632407
      Taxon  56 -> SRR5632410
      Taxon  57 -> SRR5632411
      Taxon  58 -> SRR5632413
      Taxon  59 -> SRR5632416
      Taxon  60 -> SRR5632417
      Taxon  61 -> SRR5632418
      Taxon  62 -> SRR5632421
      Taxon  63 -> SRR5632424
      Taxon  64 -> SRR5632425
      Taxon  65 -> SRR5632426
      Taxon  66 -> SRR5632427
      Taxon  67 -> SRR5632428
      Taxon  68 -> SRR5632429
      Taxon  69 -> SRR5632431
      Taxon  70 -> SRR5632433
      Taxon  71 -> SRR5632434
      Taxon  72 -> SRR5632436
      Taxon  73 -> SRR5632438
      Taxon  74 -> SRR5632439
      Taxon  75 -> SRR5632440
      Taxon  76 -> SRR5632441
      Taxon  77 -> SRR5632443
      Taxon  78 -> SRR5632446
      Taxon  79 -> SRR5632448
      Taxon  80 -> SRR5632449
      Taxon  81 -> SRR5632451
      Taxon  82 -> SRR5632452
      Taxon  83 -> SRR5632454
      Taxon  84 -> SRR5632457
      Taxon  85 -> SRR5632462
      Taxon  86 -> SRR5632465
      Taxon  87 -> SRR5632466
      Taxon  88 -> SRR5632470
      Taxon  89 -> SRR5632474
      Taxon  90 -> SRR5632476
      Taxon  91 -> SRR5632478
      Taxon  92 -> SRR5632480
      Taxon  93 -> SRR5632481
