From 09c27bdd8ee79d08dc4c8fc38c6f9a2c8f80da5c Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:48:40 +1000 Subject: [PATCH 1/3] Support Exporting DNA/RNA Analysis to MrBayes Block Files --- .gitignore | 1 + main/phyloanalysis.cpp | 15 ++++- main/phylotesting.h | 7 +++ tree/phylosupertree.cpp | 73 +++++++++++++++++------ tree/phylosupertree.h | 16 +++++- tree/phylotree.cpp | 124 +++++++++++++++++++++++++++++++++++++++- tree/phylotree.h | 26 +++++++++ utils/tools.cpp | 18 ++++-- utils/tools.h | 6 +- 9 files changed, 260 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index fe0d324c6..926678ab6 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ tags test_scripts/iqtree_test_cmds.txt pllrepo/ build/ +cmake-build-debug/ /Default-clang .idea/ .idea/codeStyleSettings.xml diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index e712707bc..48719d28a 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1156,6 +1156,12 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { if (params.optimize_linked_gtr) { cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl; } + + if (params.mr_bayes_output) { + cout << endl << "MrBayes block written to:" << endl; + cout << " Base template file: " << params.out_prefix << ".mr_bayes_scheme.nex" << endl; + cout << " Template file with parameters: " << params.out_prefix << ".mr_bayes_model.nex" << endl; + } cout << endl; } @@ -3100,8 +3106,13 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (iqtree->isSuperTree()) { - ((PhyloSuperTree*) iqtree)->computeBranchLengths(); - ((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + auto superTree = (PhyloSuperTree*) iqtree; + superTree->computeBranchLengths(); + superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + } + if (params.mr_bayes_output) { + iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), false); + iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), true); } cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; diff --git a/main/phylotesting.h b/main/phylotesting.h index 20d7d188b..286ca377d 100644 --- a/main/phylotesting.h +++ b/main/phylotesting.h @@ -390,5 +390,12 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type); string detectSeqTypeName(string model_name); +/** + * get string name from a SeqType object + * @param seq_type input sequence type + * @return name + */ +string getSeqTypeName(SeqType seq_type); + #endif /* PHYLOTESTING_H_ */ diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 21fabf81d..711e2747b 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -23,6 +23,7 @@ #include "main/phylotesting.h" #include "model/partitionmodel.h" #include "utils/MPIHelper.h" +#include "utils/tools.h" PhyloSuperTree::PhyloSuperTree() : IQTree() @@ -1519,29 +1520,20 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } -void PhyloSuperTree::printBestPartitionParams(const char *filename) { +void PhyloSuperTree::printBestPartitionParams(const char *filename) +{ try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(filename); out << "#nexus" << endl << "begin sets;" << endl; - int part; - SuperAlignment *saln = (SuperAlignment*)aln; - for (part = 0; part < size(); part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - out << saln->partitions[part]->sequence_type << ", "; - string pos = saln->partitions[part]->position_spec; - replace(pos.begin(), pos.end(), ',' , ' '); - out << pos << ";" << endl; - } + auto *saln = (SuperAlignment*)aln; + + printCharsets(out); + out << " charpartition mymodels =" << endl; - for (part = 0; part < size(); part++) { + for (int part = 0; part < size(); part++) { string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); if (part > 0) out << "," << endl; @@ -1555,3 +1547,52 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) { outError(ERR_WRITE_OUTPUT, filename); } } + +void PhyloSuperTree::printCharsets(ofstream &out) +{ + auto *saln = (SuperAlignment*)aln; + for (int part = 0; part < size(); part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } +} + +void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) +{ + ofstream out = startMrBayesBlock(filename); + printCharsets(out); + out << endl; + auto *saln = (SuperAlignment*)aln; + + // Create Partition + size_type listSize = size(); + out << " partition iqtree = " << listSize << ": "; + for (int part = 0; part < listSize; part++) { + string name = saln->partitions[part]->name; + out << name; + if (part != listSize - 1) out << ", "; + else out << ";" << endl; + } + + // Set Partition for Use + out << " set partition = iqtree;" << endl; + + // Set Outgroup (if available) + if (!rooted) out << " outgroup " << root << ";" << endl << endl; + + // Partition-Specific Model Information + for (int part = 0; part < listSize; part++) { + // MrBayes Partitions are 1-indexed + at(part)->printMrBayesModelText(out, convertIntToString(part + 1), inclParams); + } + out << "end;" << endl; + out.close(); +} diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 60dbdcb9a..7def5a0ab 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -475,12 +475,24 @@ class PhyloSuperTree : public IQTree, public vector */ void printBestPartitionParams(const char *filename); - + /** + print mr bayes block with partition information and best model parameters + @param filename output file name + @param inclParams whether to include iqtree params + */ + virtual void printMrBayesBlock(const char *filename, bool inclParams); + /** True when mixed codon with other data type */ bool rescale_codon_brlen; - + int totalNNIs, evalNNIs; +private: + /** + print charset information of the best model into an ofstream + @param out output stream + */ + void printCharsets(ofstream &out); }; #endif diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index b6bfcb595..6c7daff8a 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -37,7 +37,7 @@ #include "model/modelmixture.h" #include "phylonodemixlen.h" #include "phylotreemixlen.h" - +#include "main/phylotesting.h" const int LH_MIN_CONST = 1; @@ -6246,3 +6246,125 @@ void PhyloTree::doNNI_simple(NNIMove &move) { } } +ofstream PhyloTree::startMrBayesBlock(const char *filename) { + ofstream out; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl + << "[Furthermore, the Model Parameter '+R' and its derivatives will be ignored.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + return out; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + return out; + } +} + +void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool inclParams) +{ + string modelStr = model->getName(); + string modifiers = site_rate->name; + bool fixedFreq = model->freq_type == FREQ_EQUAL; + + short nst = 6; + string rateMod = "equal"; + + // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) + // NST is set to 6 by default (above), so check for JC/J69/F81 and K80/K2P/HKY/HKY85 + if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) + nst = 1; + else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) + nst = 2; + + // Find Rate Value (propinv for +I, gamma for +G, invgamma for +I+G) + // Default is Equal (+E) + if (modifiers.find("+G") != string::npos) { + if (modifiers.find("+I") != string::npos) + rateMod = "invgamma"; + else rateMod = "gamma"; + } else if (modifiers.find("+I") != string::npos) + rateMod = "propinv"; + + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateMod << ";" << endl; + + // Priors (apart from Fixed Freq) + if (!inclParams) { + // If not include other params, simply set fixed frequency and return + if (!fixedFreq) return; + + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + out << " prset applyto=(" << partition << ")"; + + // Gamma Distribution (+G) + // Dirichlet is not available here, use fixed + if (site_rate->getGammaShape() != 0.0) + out << " shapepr=fixed(" << site_rate->getGammaShape() << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (site_rate->getPInvar() != 0.0) + out << " pinvarpr=fixed(" << site_rate->getPInvar() << ")"; + + // Frequency of Nucleotides (+F) + if (fixedFreq) + out << " statefreqpr=fixed(equal)"; + else { + auto* freq = new double[model->num_states]; + model->getStateFrequency(freq); + out << " statefreqpr=dirichlet("; + for (int i = 0; i < model->num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + } + + // Reversible Rate Matrix + int numRateEntries = model->getNumRateEntries(); + auto* rateMat = new double[numRateEntries]; + model->getRateMatrix(rateMat); + + out << " revmatpr=dirichlet("; + for (int i = 0; i < numRateEntries; ++i) { + if (i != 0) out << ", "; + out << rateMat[i]; + } + + // Close revmatpr + prset + out << ");" << endl; +} + +void PhyloTree::printMrBayesModelText(ofstream& out, string partition, bool inclParams) +{ + switch(aln->seq_type) { + case SEQ_DNA: + printMrBayesModelTextDNA(out, partition, inclParams); + out << endl; // Add extra endline after model text for readability + break; + default: + outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(aln->seq_type)); + break; + } +} + +void PhyloTree::printMrBayesBlock(const char *filename, bool inclParams) { + ofstream out = startMrBayesBlock(filename); + // Set Outgroup (if available) + if (!rooted) out << " outgroup " << root << ";" << endl << endl; + + printMrBayesModelText(out, "all", inclParams); + out << "end;" << endl; + out.close(); +} diff --git a/tree/phylotree.h b/tree/phylotree.h index b8a19f986..a5e049b9a 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2264,6 +2264,13 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { */ const string& getDistanceFileWritten() const; + /** + print mr bayes block with partition information and best model parameters + @param filename output file name + @param inclParams whether to include iqtree params + */ + virtual void printMrBayesBlock(const char *filename, bool inclParams); + protected: @@ -2509,6 +2516,25 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { void hideProgress(); void showProgress(); void doneProgress(); + + /** + * print MrBayes formatted text, of model information (and parameters), to an ofstream. + * @param tree tree to print info from + * @param out stream to print to + * @param partition the partition to 'apply' to (all to apply to all partitions) + * @param inclParams whether to include the parameters of the model + */ + void printMrBayesModelText(ofstream& out, string partition, bool inclParams); + + /** + * Start a MrBayes Block, as well as printing a warning. + * @param filename output file name + * @return ofstream for other functions to print to + */ + ofstream startMrBayesBlock(const char *filename); + +private: + void printMrBayesModelTextDNA(ofstream &out, string partition, bool inclParams); }; #endif diff --git a/utils/tools.cpp b/utils/tools.cpp index 10930e466..54d5fc472 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -1524,6 +1524,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.include_pre_mutations = false; params.mutation_file = ""; params.site_starting_index = 0; + params.mr_bayes_output = false; // store original params for (cnt = 1; cnt < argc; cnt++) { @@ -5662,7 +5663,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.make_consistent = true; continue; } - + if (strcmp(argv[cnt], "-mrbayes") == 0) { + params.mr_bayes_output = true; + continue; + } if (argv[cnt][0] == '-') { string err = "Invalid \""; err += argv[cnt]; @@ -5800,6 +5804,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) if (params.partition_merge == MERGE_NONE) params.partition_merge = MERGE_RCLUSTERF; + + // Set MrBayes Block Output if -mset mrbayes + if (params.model_set == "mrbayes") + params.mr_bayes_output = true; if (params.alisim_active && !params.aln_file && !params.user_file && !params.partition_file && params.tree_gen == NONE) outError("A tree filepath is a mandatory input to execute AliSim when neither Inference mode nor Random mode (generating a random tree) is inactive. Use -t ; or Activate the inference mode by -s ; or Activate Random mode by -t RANDOM{,} where is yh, u, cat, bal, bd{,} stands for Yule-Harding, Uniform, Caterpillar, Balanced, Birth-Death model respectively."); @@ -6014,7 +6022,7 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -pll Use phylogenetic likelihood library (PLL) (default: off)" << endl << " --ninit NUM Number of initial parsimony trees (default: 100)" << endl << " --ntop NUM Number of top initial trees (default: 20)" << endl - << " --nbest NUM Number of best trees retained during search (defaut: 5)" << endl + << " --nbest NUM Number of best trees retained during search (default: 5)" << endl << " -n NUM Fix number of iterations to stop (default: OFF)" << endl << " --nstop NUM Number of unsuccessful iterations to stop (default: 100)" << endl << " --perturb NUM Perturbation strength for randomized NNI (default: 0.5)" << endl @@ -6071,6 +6079,8 @@ void usage_iqtree(char* argv[], bool full_command) { << " -m ...+LMSS Additionally test strand-symmetric models" << endl << " --mset STRING Restrict search to models supported by other programs" << endl << " (raxml, phyml, mrbayes, beast1 or beast2)" << endl + << " If 'mrbayes' is selected, will auto update a MrBayes" << endl + << " Block File if Data Type is supported." << endl << " --mset STR,... Comma-separated model list (e.g. -mset WAG,LG,JTT)" << endl << " --msub STRING Amino-acid model source" << endl << " (nuclear, mitochondrial, chloroplast or viral)" << endl @@ -6304,8 +6314,8 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -d Calculate the distance matrix inferred from tree" << endl // << " -stats Output some statistics about branch lengths" << endl // << " -comp Compare tree with each in the input trees" << endl; - - + << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl + << " Will not output if incompatible data types are used (anything apart from DNA, Codon and Protein)" << endl << endl; if (full_command) { diff --git a/utils/tools.h b/utils/tools.h index b6f24978a..75e538027 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -2781,6 +2781,11 @@ class Params { * site starting index (for predefined mutations in AliSim) */ int site_starting_index; + + /** + * Whether to output a MrBayes Block File + */ + bool mr_bayes_output; }; /** @@ -3728,5 +3733,4 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); - #endif From 3aec9c7f44c1b64c94cb665eb3796d167df49257 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:31:56 +1000 Subject: [PATCH 2/3] Fix Formatting Issues --- tree/phylosupertree.cpp | 6 ++++-- utils/tools.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 711e2747b..31fee24b2 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -1558,7 +1558,8 @@ void PhyloSuperTree::printCharsets(ofstream &out) if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; /*if (saln->partitions[part]->seq_type == SEQ_CODON) out << "CODON, ";*/ - out << saln->partitions[part]->sequence_type << ", "; + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; string pos = saln->partitions[part]->position_spec; replace(pos.begin(), pos.end(), ',' , ' '); out << pos << ";" << endl; @@ -1577,6 +1578,7 @@ void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) out << " partition iqtree = " << listSize << ": "; for (int part = 0; part < listSize; part++) { string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); out << name; if (part != listSize - 1) out << ", "; else out << ";" << endl; @@ -1586,7 +1588,7 @@ void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) out << " set partition = iqtree;" << endl; // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root << ";" << endl << endl; + if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; // Partition-Specific Model Information for (int part = 0; part < listSize; part++) { diff --git a/utils/tools.cpp b/utils/tools.cpp index 54d5fc472..17bca7210 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -6079,7 +6079,7 @@ void usage_iqtree(char* argv[], bool full_command) { << " -m ...+LMSS Additionally test strand-symmetric models" << endl << " --mset STRING Restrict search to models supported by other programs" << endl << " (raxml, phyml, mrbayes, beast1 or beast2)" << endl - << " If 'mrbayes' is selected, will auto update a MrBayes" << endl + << " If 'mrbayes' is selected, will output a MrBayes" << endl << " Block File if Data Type is supported." << endl << " --mset STR,... Comma-separated model list (e.g. -mset WAG,LG,JTT)" << endl << " --msub STRING Amino-acid model source" << endl From ac60fdc0a64ab646c50b642a54e12f0835ae71f2 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Thu, 4 Jul 2024 16:59:14 +1000 Subject: [PATCH 3/3] Move Functions to Supplementary, Fix +R Remapping --- model/modeldna.cpp | 4 + model/modeldna.h | 6 ++ model/modelsubst.h | 6 ++ tree/phylosupertree.cpp | 48 ++++++----- tree/phylosupertree.h | 15 ++-- tree/phylotree.cpp | 184 ++++++++++++++++++++++++++++------------ tree/phylotree.h | 45 +++++----- utils/tools.cpp | 3 +- 8 files changed, 208 insertions(+), 103 deletions(-) diff --git a/model/modeldna.cpp b/model/modeldna.cpp index c10317c2f..f2cbc0be0 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -564,3 +564,7 @@ void ModelDNA::setVariables(double *variables) { // j++; // } } + +string ModelDNA::getModelDNACode() { + return param_spec; +} diff --git a/model/modeldna.h b/model/modeldna.h index 6694edde1..3f9bd7a61 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -114,6 +114,12 @@ class ModelDNA : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model + * Returns empty string by default (this is not a dna specific model) + */ + virtual string getModelDNACode(); + protected: /** diff --git a/model/modelsubst.h b/model/modelsubst.h index 37835a8f2..07e7444cf 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -392,6 +392,12 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual ModelSubst *getMutationModel() { return this; } + /** + * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model + * Returns empty string by default (this is not a dna specific model) + */ + virtual string getModelDNACode() { return ""; } + /***************************************************** Checkpointing facility *****************************************************/ diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 31fee24b2..9d0213051 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -1520,6 +1520,23 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } +void printCharsets(SuperAlignment* saln, size_t size, ofstream &out) +{ + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } +} + void PhyloSuperTree::printBestPartitionParams(const char *filename) { try { @@ -1530,7 +1547,7 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) << "begin sets;" << endl; auto *saln = (SuperAlignment*)aln; - printCharsets(out); + printCharsets(saln, size(), out); out << " charpartition mymodels =" << endl; for (int part = 0; part < size(); part++) { @@ -1548,30 +1565,21 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) } } -void PhyloSuperTree::printCharsets(ofstream &out) -{ - auto *saln = (SuperAlignment*)aln; - for (int part = 0; part < size(); part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - if (!saln->partitions[part]->sequence_type.empty()) - out << saln->partitions[part]->sequence_type << ", "; - string pos = saln->partitions[part]->position_spec; - replace(pos.begin(), pos.end(), ',' , ' '); - out << pos << ";" << endl; - } +void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { + // Call PhyloTree's function inside the correct checkpoint struct + checkpoint->startStruct("PartitionModelPlen"); + checkpoint->startStruct(charset); + PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out); + checkpoint->endStruct(); + checkpoint->endStruct(); } void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) { ofstream out = startMrBayesBlock(filename); - printCharsets(out); - out << endl; auto *saln = (SuperAlignment*)aln; + printCharsets(saln, size(), out); + out << endl; // Create Partition size_type listSize = size(); @@ -1593,7 +1601,7 @@ void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) // Partition-Specific Model Information for (int part = 0; part < listSize; part++) { // MrBayes Partitions are 1-indexed - at(part)->printMrBayesModelText(out, convertIntToString(part + 1), inclParams); + printMrBayesModelText(this, at(part), out, convertIntToString(part + 1), saln->partitions[part]->name, inclParams); } out << "end;" << endl; out.close(); diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 7def5a0ab..f2304fcab 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -482,17 +482,18 @@ class PhyloSuperTree : public IQTree, public vector */ virtual void printMrBayesBlock(const char *filename, bool inclParams); + /** + * Prints the replacement prior settings for +R or +R+I. + * @param rate the heterogeneity rate + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + */ + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + /** True when mixed codon with other data type */ bool rescale_codon_brlen; int totalNNIs, evalNNIs; - -private: - /** - print charset information of the best model into an ofstream - @param out output stream - */ - void printCharsets(ofstream &out); }; #endif diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 6c7daff8a..8b1f6bce8 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -6246,52 +6246,73 @@ void PhyloTree::doNNI_simple(NNIMove &move) { } } -ofstream PhyloTree::startMrBayesBlock(const char *filename) { - ofstream out; - try { - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); +/* Private Supplementary Logic Functions */ - // Write Warning - out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl - << "[Furthermore, the Model Parameter '+R' and its derivatives will be ignored.]" << endl - << "[This should be used as a Template Only.]" << endl << endl; - - // Begin File, Print Charsets - out << "begin mrbayes;" << endl; - return out; - } catch (ios::failure &) { - outError(ERR_WRITE_OUTPUT, filename); - return out; +/** + * Find Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
+ * Default is Equal (+E) + */ +string getMrBayesRHASName(RateHeterogeneity* rate) +{ + bool hasGamma = rate->getGammaShape() != 0.0; + bool hasInvariable = rate->getPInvar() != 0.0; + if (hasGamma) { + if (hasInvariable) + return "invgamma"; + return "gamma"; } + if (hasInvariable) + return "propinv"; + return "equal"; } -void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool inclParams) +void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out) { - string modelStr = model->getName(); - string modifiers = site_rate->name; - bool fixedFreq = model->freq_type == FREQ_EQUAL; + bool hasI = rate->getPInvar() > 0.0; + + // Freerate (+R) + // Get replacement gamma shape + if (rate->isFreeRate()) { + originalTree ->printMrBayesFreeRateReplacement(rate, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (hasI) + out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; +} +void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) +{ + ModelSubst* model = tree->getModel(); + bool equalFreq = model->freq_type == FREQ_EQUAL; short nst = 6; - string rateMod = "equal"; + string rateMod = getMrBayesRHASName(tree->getRate()); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) - // NST is set to 6 by default (above), so check for JC/J69/F81 and K80/K2P/HKY/HKY85 - if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) - nst = 1; - else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) - nst = 2; - - // Find Rate Value (propinv for +I, gamma for +G, invgamma for +I+G) - // Default is Equal (+E) - if (modifiers.find("+G") != string::npos) { - if (modifiers.find("+I") != string::npos) - rateMod = "invgamma"; - else rateMod = "gamma"; - } else if (modifiers.find("+I") != string::npos) - rateMod = "propinv"; + // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 + // If it returns a valid dna code, we can extract information from there (needed for user-defined models) + string dnaCode = model->getModelDNACode(); + string modelStr = model->getName(); + if (!dnaCode.empty()) { + // if all equal + if (dnaCode.find_first_not_of(dnaCode[0]) == string::npos) + nst = 1; + // if A-G=C-T and everything else is equal to first part of dna code + else if (dnaCode[1] == dnaCode[4] && dnaCode[0] == dnaCode[2] && dnaCode[0] == dnaCode[3] && dnaCode[0] == dnaCode[5]) + nst = 2; + } else if (!modelStr.empty()) { + // Check the name of the model + if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) + nst = 1; + else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) + nst = 2; + } // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateMod << ";" << endl; @@ -6299,7 +6320,7 @@ void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool i // Priors (apart from Fixed Freq) if (!inclParams) { // If not include other params, simply set fixed frequency and return - if (!fixedFreq) return; + if (!equalFreq) return; out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; return; @@ -6307,18 +6328,10 @@ void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool i out << " prset applyto=(" << partition << ")"; - // Gamma Distribution (+G) - // Dirichlet is not available here, use fixed - if (site_rate->getGammaShape() != 0.0) - out << " shapepr=fixed(" << site_rate->getGammaShape() << ")"; - - // Invariable Sites (+I) - // Dirichlet is not available here, use fixed - if (site_rate->getPInvar() != 0.0) - out << " pinvarpr=fixed(" << site_rate->getPInvar() << ")"; + printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); // Frequency of Nucleotides (+F) - if (fixedFreq) + if (equalFreq) out << " statefreqpr=fixed(equal)"; else { auto* freq = new double[model->num_states]; @@ -6329,6 +6342,8 @@ void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool i out << freq[i]; } out << ")"; + + delete[] freq; } // Reversible Rate Matrix @@ -6344,27 +6359,84 @@ void PhyloTree::printMrBayesModelTextDNA(ofstream &out, string partition, bool i // Close revmatpr + prset out << ");" << endl; + + delete[] rateMat; } -void PhyloTree::printMrBayesModelText(ofstream& out, string partition, bool inclParams) +/* Public Supplementary Functions (Used by PhyloSuperTree) */ +ofstream startMrBayesBlock(const char *filename) { + ofstream out; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + return out; +} + +void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams) { - switch(aln->seq_type) { + switch(tree->aln->seq_type) { case SEQ_DNA: - printMrBayesModelTextDNA(out, partition, inclParams); - out << endl; // Add extra endline after model text for readability + printMrBayesModelTextDNA(originalTree, tree, out, partition, charset, inclParams); break; - default: - outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(aln->seq_type)); + case SEQ_CODON: break; + case SEQ_PROTEIN: + break; + case SEQ_BINARY: + break; + case SEQ_MORPH: + break; + default: + outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(tree->aln->seq_type)); + return; } + out << endl; // Add extra endline after model text for readability +} + +/* Protected Overrided Function */ +void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { + bool hasInvar = rate->getPInvar() > 0.0; + string key = hasInvar ? "RateGammaInvar" : "RateGamma"; + + double gamma_shape = 0.0; + double p_invar = 0.0; + + checkpoint->startStruct(key); + + CKP_RESTORE(gamma_shape); + if (hasInvar) + CKP_RESTORE(p_invar); + + checkpoint->endStruct(); + + // Sanity Check + if (gamma_shape > 0.0) + out << " shapepr=fixed(" << gamma_shape << ")"; + + if (p_invar > 0.0) + out << " pinvarpr=fixed(" << p_invar << ")"; } +/* Main Function */ void PhyloTree::printMrBayesBlock(const char *filename, bool inclParams) { ofstream out = startMrBayesBlock(filename); // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root << ";" << endl << endl; + if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; - printMrBayesModelText(out, "all", inclParams); + printMrBayesModelText(this, this, out, "all", "", inclParams); out << "end;" << endl; out.close(); } diff --git a/tree/phylotree.h b/tree/phylotree.h index a5e049b9a..f1f2029e0 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2265,12 +2265,20 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { const string& getDistanceFileWritten() const; /** - print mr bayes block with partition information and best model parameters + print MrBayes block with partition information, and best model parameters (if inclParams is true) @param filename output file name @param inclParams whether to include iqtree params */ virtual void printMrBayesBlock(const char *filename, bool inclParams); + /** + * Prints the replacement prior settings for +R or +R+I. + * @param rate the heterogeneity rate + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + */ + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + protected: @@ -2516,25 +2524,24 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { void hideProgress(); void showProgress(); void doneProgress(); +}; - /** - * print MrBayes formatted text, of model information (and parameters), to an ofstream. - * @param tree tree to print info from - * @param out stream to print to - * @param partition the partition to 'apply' to (all to apply to all partitions) - * @param inclParams whether to include the parameters of the model - */ - void printMrBayesModelText(ofstream& out, string partition, bool inclParams); - - /** - * Start a MrBayes Block, as well as printing a warning. - * @param filename output file name - * @return ofstream for other functions to print to - */ - ofstream startMrBayesBlock(const char *filename); +/** + * print MrBayes formatted text, of model information (and parameters), to an ofstream. + * @param originalTree the original tree, of which is called to retrieve values in checkpoint file (to allow for overriding) + * @param tree tree to print info from + * @param out stream to print to + * @param partition the partition to 'apply' to (all to apply to all partitions) + * @param charset the (original) charset of the current partition. Used to retrieve values in checkpoint file. Input an empty string if this is not a partitioned tree. + * @param inclParams whether to include the parameters of the model + */ +void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams); -private: - void printMrBayesModelTextDNA(ofstream &out, string partition, bool inclParams); -}; +/** + * Start a MrBayes Block, as well as printing a warning. + * @param filename output file name + * @return ofstream for other functions to print to + */ +ofstream startMrBayesBlock(const char *filename); #endif diff --git a/utils/tools.cpp b/utils/tools.cpp index 17bca7210..db21f7917 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -6315,7 +6315,8 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -stats Output some statistics about branch lengths" << endl // << " -comp Compare tree with each in the input trees" << endl; << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl - << " Will not output if incompatible data types are used (anything apart from DNA, Codon and Protein)" << endl + << " Will not output if incompatible data types are used" << endl + << " (MrBayes only supports DNA, Codon, Protein, Binary and Morphological data)" << endl << endl; if (full_command) {