In [20]:
import ipyrad.analysis as ipa
import toyplot
import toytree
import ipcoal

Window extracter parameters are altered to produce a subset of ~200-220 taxa.  The subsampling can then be implemented in mrbayes, with the goal of generating trees with good-looking branch lengths.

In [18]:
wex05_pt = ipa.window_extracter(
    name = "pt_scaffold05",
    data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.seqs.hdf5",
    workdir = "/moto/eaton/users/hnl2109/analysis-window_extracter",
    scaffold_idxs = [5],
    mincov = 220,
    rmincov = 0.9
)

wex05_pt.stats

Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
prefilter,Qrob_Chr06,0,,1661653,64087,0.92,261
postfilter,Qrob_Chr06,0,,30836,3643,0.05,217


In [19]:
# Write to a nexus file for implementation in mrbayes.
wex05_pt.run(nexus = True)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex


In [49]:
# Write to phylip to run through RAxML.  This is an attempt to make a constraint tree that matches the subsampling.
wex05_pt.run(nexus = False)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.phy


In [50]:
rax05_pt = ipa.raxml(
    name = "pt_scaffold05",
    data = wex05_pt.outfile,
    workdir = "/moto/eaton/users/hnl2109/analysis-raxml",
    N = 100,
    T = 12,
)

rax05_pt.command

'/moto/home/hnl2109/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 12 -m GTRGAMMA -n pt_scaffold05 -w /moto/eaton/users/hnl2109/analysis-raxml -s /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.phy -p 54321 -N 100 -x 12345'

In [51]:
rax05_pt.run()

job pt_scaffold05 finished successfully


In [33]:
wex_full = ipa.window_extracter(
    name = "full_scaffold",
    data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.seqs.hdf5",
    workdir = "/moto/eaton/users/hnl2109/analysis-window_extracter",
    scaffold_idxs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
)

wex_full.stats

Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
0,concatenated,0,18176689,18176689,793477,0.919,261


In [35]:
wex_full.run(nexus = True)

Wrote data to /moto/eaton/users/hnl2109/analysis-window_extracter/full_scaffold.nex


In [102]:
TREE = toytree.tree("/moto/eaton/users/hnl2109/analysis-raxml/RAxML_bipartitions.pt_scaffold05")
model = ipcoal.Model(TREE, Ne=2e4, nsamples=2, recomb=0)

In [38]:
# Setup for test under a relaxed clock model.
mbt = ipa.mrbayes(
    data="/moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex",
    name="mbtest-t",
    workdir="/moto/eaton/users/hnl2109/analysis-mrbayes/",
    constraints=TREE,
    clock_model=2,
    ngen=int(1e6),
    nruns=2,
)

print(mb1.params)

brlenspr      clock:uniform       
clockratepr   normal(0.01,0.005)  
clockvarpr    igr                 
igrvarpr      exp(10.0)           
nchains       4                   
ngen          1000000             
nruns         2                   
samplefreq    1000                
topologypr    uniform             



In [39]:
mb1.run(force=True)

job itest-1 finished successfully


In [87]:
# Same nexus file, but with a constraining tree, generated from a RAxML run on a phylip file from the same subsample.
mb2 = ipa.mrbayes(
    data="/moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex",
    name="itest-2",
    workdir="/moto/eaton/users/hnl2109/analysis-mrbayes/",
    constraints=TREE,
    clock_model=2,
    ngen=int(1e6),
    nruns=2,
)

print(mb2.params)

brlenspr      clock:uniform       
clockratepr   normal(0.01,0.005)  
clockvarpr    igr                 
igrvarpr      exp(10.0)           
nchains       4                   
ngen          1000000             
nruns         2                   
samplefreq    1000                
topologypr    fixed(fixedtree)    



In [83]:
print(mb2.nexus_string)

#NEXUS

[log block]
log start filename=/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex.log replace;

[data block]
execute /moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex;

[tree block]
begin trees;
  tree fixedtree = (reference,SRR8860678,((SRR8860611,(SRR5632365,SRR8860589)),((SRR5632407,(((SRR8860650,SRR5632542),(SRR8860536,(SRR8860665,SRR8860521))),((SRR8860698,SRR5632599),(SRR8860703,(SRR8860656,(SRR5632410,SRR8860609)))))),((SRR5284345,(SRR5632587,(SRR5632592,((SRR5632548,SRR5632357),(SRR8860721,SRR5984320))))),((SRR5632586,(SRR5284342,(SRR5284351,SRR5284372))),(((SRR5284339,SRR5284337),(SRR5284355,(SRR5632514,SRR5284357))),(((SRR5632377,((SRR5632428,SRR5632436),(SRR5632344,(SRR5632345,(SRR5632413,(SRR5632411,SRR5632382)))))),(SRR5632617,((SRR8860622,SRR8860565),(((SRR5632393,SRR8860541),((SRR5632438,(SRR5632451,SRR5632353)),(SRR8860543,(SRR8860696,(SRR8860593,((SRR8860709,SRR8860727),(SRR5632545,SRR5632439))))))),((SRR5632449,SRR5632416),((SRR5632452,

In [88]:
mb2.run(force = True)

IPyradError: 

                            MrBayes 3.2.7 x86_64

                      (Bayesian Analysis of Phylogeny)

              Distributed under the GNU General Public License


               Type "help" or "help <command>" for information
                     on the commands that are available.

                   Type "about" for authorship and general
                       information about the program.



   Executing file "/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex"
   UNIX line termination
   Longest line length = 2836
   Parsing file
   Expecting NEXUS formatted file
   Logging screen output to file "/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex.log"

   Executing file "/moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex"...

   UNIX line termination
   Longest line length = 114
   Parsing file
   Expecting NEXUS formatted file
   Reading data block
      Allocated taxon set
      Allocated matrix
      Defining new matrix with 217 taxa and 30836 characters
      Data is Dna
      Missing data coded as N
      Gaps coded as -
      Data matrix is interleaved
      Taxon   1 -> reference
      Taxon   2 -> SRR1915525
      Taxon   3 -> SRR1915528
      Taxon   4 -> SRR1915533
      Taxon   5 -> SRR1915534
      Taxon   6 -> SRR1915539
      Taxon   7 -> SRR1915542
      Taxon   8 -> SRR1915547
      Taxon   9 -> SRR1915549
      Taxon  10 -> SRR1915552
      Taxon  11 -> SRR1915561
      Taxon  12 -> SRR5284330
      Taxon  13 -> SRR5284335
      Taxon  14 -> SRR5284337
      Taxon  15 -> SRR5284339
      Taxon  16 -> SRR5284342
      Taxon  17 -> SRR5284345
      Taxon  18 -> SRR5284351
      Taxon  19 -> SRR5284352
      Taxon  20 -> SRR5284355
      Taxon  21 -> SRR5284357
      Taxon  22 -> SRR5284372
      Taxon  23 -> SRR5632344
      Taxon  24 -> SRR5632345
      Taxon  25 -> SRR5632353
      Taxon  26 -> SRR5632355
      Taxon  27 -> SRR5632357
      Taxon  28 -> SRR5632358
      Taxon  29 -> SRR5632361
      Taxon  30 -> SRR5632365
      Taxon  31 -> SRR5632373
      Taxon  32 -> SRR5632374
      Taxon  33 -> SRR5632377
      Taxon  34 -> SRR5632378
      Taxon  35 -> SRR5632379
      Taxon  36 -> SRR5632380
      Taxon  37 -> SRR5632381
      Taxon  38 -> SRR5632382
      Taxon  39 -> SRR5632383
      Taxon  40 -> SRR5632384
      Taxon  41 -> SRR5632393
      Taxon  42 -> SRR5632394
      Taxon  43 -> SRR5632397
      Taxon  44 -> SRR5632401
      Taxon  45 -> SRR5632403
      Taxon  46 -> SRR5632404
      Taxon  47 -> SRR5632406
      Taxon  48 -> SRR5632407
      Taxon  49 -> SRR5632410
      Taxon  50 -> SRR5632411
      Taxon  51 -> SRR5632413
      Taxon  52 -> SRR5632416
      Taxon  53 -> SRR5632418
      Taxon  54 -> SRR5632421
      Taxon  55 -> SRR5632424
      Taxon  56 -> SRR5632425
      Taxon  57 -> SRR5632428
      Taxon  58 -> SRR5632436
      Taxon  59 -> SRR5632438
      Taxon  60 -> SRR5632439
      Taxon  61 -> SRR5632440
      Taxon  62 -> SRR5632441
      Taxon  63 -> SRR5632443
      Taxon  64 -> SRR5632448
      Taxon  65 -> SRR5632449
      Taxon  66 -> SRR5632451
      Taxon  67 -> SRR5632452
      Taxon  68 -> SRR5632457
      Taxon  69 -> SRR5632462
      Taxon  70 -> SRR5632465
      Taxon  71 -> SRR5632470
      Taxon  72 -> SRR5632474
      Taxon  73 -> SRR5632478
      Taxon  74 -> SRR5632480
      Taxon  75 -> SRR5632486
      Taxon  76 -> SRR5632491
      Taxon  77 -> SRR5632493
      Taxon  78 -> SRR5632496
      Taxon  79 -> SRR5632502
      Taxon  80 -> SRR5632505
      Taxon  81 -> SRR5632507
      Taxon  82 -> SRR5632510
      Taxon  83 -> SRR5632512
      Taxon  84 -> SRR5632513
      Taxon  85 -> SRR5632514
      Taxon  86 -> SRR5632516
      Taxon  87 -> SRR5632519
      Taxon  88 -> SRR5632521
      Taxon  89 -> SRR5632522
      Taxon  90 -> SRR5632524
      Taxon  91 -> SRR5632526
      Taxon  92 -> SRR5632528
      Taxon  93 -> SRR5632536
      Taxon  94 -> SRR5632537
      Taxon  95 -> SRR5632542
      Taxon  96 -> SRR5632545
      Taxon  97 -> SRR5632546
      Taxon  98 -> SRR5632548
      Taxon  99 -> SRR5632549
      Taxon 100 -> SRR5632550
      Taxon 101 -> SRR5632552
      Taxon 102 -> SRR5632560
      Taxon 103 -> SRR5632563
      Taxon 104 -> SRR5632564
      Taxon 105 -> SRR5632566
      Taxon 106 -> SRR5632569
      Taxon 107 -> SRR5632579
      Taxon 108 -> SRR5632586
      Taxon 109 -> SRR5632587
      Taxon 110 -> SRR5632589
      Taxon 111 -> SRR5632592
      Taxon 112 -> SRR5632597
      Taxon 113 -> SRR5632599
      Taxon 114 -> SRR5632616
      Taxon 115 -> SRR5632617
      Taxon 116 -> SRR5632619
      Taxon 117 -> SRR5632626
      Taxon 118 -> SRR5984279
      Taxon 119 -> SRR5984280
      Taxon 120 -> SRR5984281
      Taxon 121 -> SRR5984284
      Taxon 122 -> SRR5984285
      Taxon 123 -> SRR5984287
      Taxon 124 -> SRR5984288
      Taxon 125 -> SRR5984289
      Taxon 126 -> SRR5984290
      Taxon 127 -> SRR5984291
      Taxon 128 -> SRR5984292
      Taxon 129 -> SRR5984293
      Taxon 130 -> SRR5984294
      Taxon 131 -> SRR5984295
      Taxon 132 -> SRR5984296
      Taxon 133 -> SRR5984297
      Taxon 134 -> SRR5984298
      Taxon 135 -> SRR5984300
      Taxon 136 -> SRR5984302
      Taxon 137 -> SRR5984303
      Taxon 138 -> SRR5984304
      Taxon 139 -> SRR5984305
      Taxon 140 -> SRR5984306
      Taxon 141 -> SRR5984307
      Taxon 142 -> SRR5984308
      Taxon 143 -> SRR5984310
      Taxon 144 -> SRR5984311
      Taxon 145 -> SRR5984312
      Taxon 146 -> SRR5984314
      Taxon 147 -> SRR5984315
      Taxon 148 -> SRR5984316
      Taxon 149 -> SRR5984317
      Taxon 150 -> SRR5984318
      Taxon 151 -> SRR5984319
      Taxon 152 -> SRR5984320
      Taxon 153 -> SRR8860507
      Taxon 154 -> SRR8860511
      Taxon 155 -> SRR8860515
      Taxon 156 -> SRR8860517
      Taxon 157 -> SRR8860521
      Taxon 158 -> SRR8860536
      Taxon 159 -> SRR8860541
      Taxon 160 -> SRR8860543
      Taxon 161 -> SRR8860544
      Taxon 162 -> SRR8860551
      Taxon 163 -> SRR8860552
      Taxon 164 -> SRR8860553
      Taxon 165 -> SRR8860556
      Taxon 166 -> SRR8860558
      Taxon 167 -> SRR8860565
      Taxon 168 -> SRR8860568
      Taxon 169 -> SRR8860571
      Taxon 170 -> SRR8860573
      Taxon 171 -> SRR8860579
      Taxon 172 -> SRR8860589
      Taxon 173 -> SRR8860593
      Taxon 174 -> SRR8860595
      Taxon 175 -> SRR8860599
      Taxon 176 -> SRR8860607
      Taxon 177 -> SRR8860609
      Taxon 178 -> SRR8860610
      Taxon 179 -> SRR8860611
      Taxon 180 -> SRR8860612
      Taxon 181 -> SRR8860613
      Taxon 182 -> SRR8860621
      Taxon 183 -> SRR8860622
      Taxon 184 -> SRR8860624
      Taxon 185 -> SRR8860635
      Taxon 186 -> SRR8860642
      Taxon 187 -> SRR8860647
      Taxon 188 -> SRR8860650
      Taxon 189 -> SRR8860656
      Taxon 190 -> SRR8860659
      Taxon 191 -> SRR8860665
      Taxon 192 -> SRR8860671
      Taxon 193 -> SRR8860673
      Taxon 194 -> SRR8860678
      Taxon 195 -> SRR8860681
      Taxon 196 -> SRR8860688
      Taxon 197 -> SRR8860691
      Taxon 198 -> SRR8860694
      Taxon 199 -> SRR8860696
      Taxon 200 -> SRR8860698
      Taxon 201 -> SRR8860700
      Taxon 202 -> SRR8860701
      Taxon 203 -> SRR8860703
      Taxon 204 -> SRR8860704
      Taxon 205 -> SRR8860709
      Taxon 206 -> SRR8860715
      Taxon 207 -> SRR8860717
      Taxon 208 -> SRR8860719
      Taxon 209 -> SRR8860721
      Taxon 210 -> SRR8860726
      Taxon 211 -> SRR8860727
      Taxon 212 -> SRR8860731
      Taxon 213 -> SRR8860740
      Taxon 214 -> SRR8860744
      Taxon 215 -> SRR8860745
      Taxon 216 -> SRR8860746
      Taxon 217 -> SRR8860749
      Successfully read matrix
      Setting default partition (does not divide up characters)
      Setting model defaults
      Seed (for generating default start values) = 1596435677
      Setting output file names to "/moto/eaton/users/hnl2109/analysis-window_extracter/pt_scaffold05.nex.run<i>.<p|t>"
   Exiting data block
   Reached end of file

   Returning execution to calling file ...

   Reading trees block
      Successfully read tree 'fixedtree'
   Exiting mrbayes block
   Reading mrbayes block
      Setting autoclose to yes
      Setting nowarnings to yes
      Setting Nst to 6
      Setting Rates to Gamma
      Successfully set likelihood model parameters
      Setting Brlenspr to Clock:Uniform
      Successfully set prior model parameters
      Setting Clockvarpr to Igr
      Successfully set prior model parameters
      Setting Igrvarpr to Exponential(10.00)
      Successfully set prior model parameters
      Setting Clockratepr to Normal(0.010000,0.005000)
      Successfully set prior model parameters
      Setting Topologypr to Fixed
      Successfully set prior model parameters
      Could not fix topology because user tree 'fixedtree' differs in rootedness with the model tree.
      The user tree fixedtree is not rooted, while expected model tree is rooted.
      Check brlenspr is set correctly before fixing topology.
      Error in command "Prset"
      The error occurred when reading char. 36-36 on line 24
         in the file '/moto/eaton/users/hnl2109/analysis-mrbayes/itest-2.nex'

   Returning execution to command line ...

   Error in command "Execute"
   Will exit with signal 1 (error) because quitonerror is set to yes
   If you want control to be returned to the command line on error,
   use 'mb -i <filename>' (i is for interactive) or use 'set quitonerror=no'

