From 19b1fdc6455da7fd60ddf391427fe2278d1b8a78 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Thu, 4 Jul 2024 17:14:26 +1000 Subject: [PATCH 01/15] Support Exporting DNA/RNA Analysis to MrBayes Block Files (#260) * Support Exporting DNA/RNA Analysis to MrBayes Block Files * Fix Formatting Issues * Move Functions to Supplementary, Fix +R Remapping --- .gitignore | 1 + main/phyloanalysis.cpp | 15 ++- main/phylotesting.h | 7 ++ model/modeldna.cpp | 4 + model/modeldna.h | 6 ++ model/modelsubst.h | 6 ++ tree/phylosupertree.cpp | 90 ++++++++++++++---- tree/phylosupertree.h | 19 +++- tree/phylotree.cpp | 196 +++++++++++++++++++++++++++++++++++++++- tree/phylotree.h | 33 +++++++ utils/tools.cpp | 19 +++- utils/tools.h | 6 +- 12 files changed, 371 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index fe0d324c6..926678ab6 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ tags test_scripts/iqtree_test_cmds.txt pllrepo/ build/ +cmake-build-debug/ /Default-clang .idea/ .idea/codeStyleSettings.xml diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 7fafdd978..3c3cb5d70 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1312,6 +1312,12 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { if (params.optimize_linked_gtr) { cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl; } + + if (params.mr_bayes_output) { + cout << endl << "MrBayes block written to:" << endl; + cout << " Base template file: " << params.out_prefix << ".mr_bayes_scheme.nex" << endl; + cout << " Template file with parameters: " << params.out_prefix << ".mr_bayes_model.nex" << endl; + } cout << endl; } @@ -3815,8 +3821,13 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (iqtree->isSuperTree()) { - ((PhyloSuperTree*) iqtree)->computeBranchLengths(); - ((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + auto superTree = (PhyloSuperTree*) iqtree; + superTree->computeBranchLengths(); + superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + } + if (params.mr_bayes_output) { + iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), false); + iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), true); } cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; diff --git a/main/phylotesting.h b/main/phylotesting.h index 6de9a4213..a206a56f6 100644 --- a/main/phylotesting.h +++ b/main/phylotesting.h @@ -807,6 +807,13 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type); string detectSeqTypeName(string model_name); +/** + * get string name from a SeqType object + * @param seq_type input sequence type + * @return name + */ +string getSeqTypeName(SeqType seq_type); + /****************************************************/ /* Q MATRICES NESTING CHECK */ /****************************************************/ diff --git a/model/modeldna.cpp b/model/modeldna.cpp index c10317c2f..f2cbc0be0 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -564,3 +564,7 @@ void ModelDNA::setVariables(double *variables) { // j++; // } } + +string ModelDNA::getModelDNACode() { + return param_spec; +} diff --git a/model/modeldna.h b/model/modeldna.h index 6694edde1..3f9bd7a61 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -114,6 +114,12 @@ class ModelDNA : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model + * Returns empty string by default (this is not a dna specific model) + */ + virtual string getModelDNACode(); + protected: /** diff --git a/model/modelsubst.h b/model/modelsubst.h index 662dc0a0c..6e93f0754 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -395,6 +395,12 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual ModelSubst *getMutationModel() { return this; } + /** + * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model + * Returns empty string by default (this is not a dna specific model) + */ + virtual string getModelDNACode() { return ""; } + /***************************************************** Checkpointing facility *****************************************************/ diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 653f87c5f..91c3f667e 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -23,6 +23,7 @@ #include "main/phylotesting.h" #include "model/partitionmodel.h" #include "utils/MPIHelper.h" +#include "utils/tools.h" PhyloSuperTree::PhyloSuperTree() : IQTree() @@ -1526,33 +1527,40 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } -void PhyloSuperTree::printBestPartitionParams(const char *filename) { +void printCharsets(SuperAlignment* saln, size_t size, ofstream &out) +{ + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + if (!saln->partitions[part]->sequence_type.empty() && !pos.empty()) { + out << ", "; + } + out << pos << ";" << endl; + } +} + +void PhyloSuperTree::printBestPartitionParams(const char *filename) +{ try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(filename); out << "#nexus" << endl << "begin sets;" << endl; - int part; - SuperAlignment *saln = (SuperAlignment*)aln; - for (part = 0; part < size(); part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - if (!saln->partitions[part]->sequence_type.empty()) - out << saln->partitions[part]->sequence_type << ","; - string pos = saln->partitions[part]->position_spec; - replace(pos.begin(), pos.end(), ',' , ' '); - if (!saln->partitions[part]->sequence_type.empty() && !pos.empty()) { - out << ", "; - } - out << pos << ";" << endl; - } + auto *saln = (SuperAlignment*)aln; + + printCharsets(saln, size(), out); + out << " charpartition mymodels =" << endl; - for (part = 0; part < size(); part++) { + for (int part = 0; part < size(); part++) { string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); if (part > 0) out << "," << endl; @@ -1566,3 +1574,45 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) { outError(ERR_WRITE_OUTPUT, filename); } } + +void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { + // Call PhyloTree's function inside the correct checkpoint struct + checkpoint->startStruct("PartitionModelPlen"); + checkpoint->startStruct(charset); + PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out); + checkpoint->endStruct(); + checkpoint->endStruct(); +} + +void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) +{ + ofstream out = startMrBayesBlock(filename); + auto *saln = (SuperAlignment*)aln; + printCharsets(saln, size(), out); + out << endl; + + // Create Partition + size_type listSize = size(); + out << " partition iqtree = " << listSize << ": "; + for (int part = 0; part < listSize; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << name; + if (part != listSize - 1) out << ", "; + else out << ";" << endl; + } + + // Set Partition for Use + out << " set partition = iqtree;" << endl; + + // Set Outgroup (if available) + if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; + + // Partition-Specific Model Information + for (int part = 0; part < listSize; part++) { + // MrBayes Partitions are 1-indexed + printMrBayesModelText(this, at(part), out, convertIntToString(part + 1), saln->partitions[part]->name, inclParams); + } + out << "end;" << endl; + out.close(); +} diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 140d2dcb7..19f8afbc0 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -475,12 +475,25 @@ class PhyloSuperTree : public IQTree, public vector */ void printBestPartitionParams(const char *filename); - + /** + print mr bayes block with partition information and best model parameters + @param filename output file name + @param inclParams whether to include iqtree params + */ + virtual void printMrBayesBlock(const char *filename, bool inclParams); + + /** + * Prints the replacement prior settings for +R or +R+I. + * @param rate the heterogeneity rate + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + */ + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + /** True when mixed codon with other data type */ bool rescale_codon_brlen; - - int totalNNIs, evalNNIs; + int totalNNIs, evalNNIs; }; #endif diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 90d928116..708491002 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -37,7 +37,7 @@ #include "model/modelmixture.h" #include "phylonodemixlen.h" #include "phylotreemixlen.h" - +#include "main/phylotesting.h" const int LH_MIN_CONST = 1; @@ -6283,3 +6283,197 @@ void PhyloTree::doNNI_simple(NNIMove &move) { } } +/* Private Supplementary Logic Functions */ + +/** + * Find Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
+ * Default is Equal (+E) + */ +string getMrBayesRHASName(RateHeterogeneity* rate) +{ + bool hasGamma = rate->getGammaShape() != 0.0; + bool hasInvariable = rate->getPInvar() != 0.0; + if (hasGamma) { + if (hasInvariable) + return "invgamma"; + return "gamma"; + } + if (hasInvariable) + return "propinv"; + return "equal"; +} + +void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out) +{ + bool hasI = rate->getPInvar() > 0.0; + + // Freerate (+R) + // Get replacement gamma shape + if (rate->isFreeRate()) { + originalTree ->printMrBayesFreeRateReplacement(rate, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (hasI) + out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; +} + +void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) +{ + ModelSubst* model = tree->getModel(); + bool equalFreq = model->freq_type == FREQ_EQUAL; + short nst = 6; + string rateMod = getMrBayesRHASName(tree->getRate()); + + // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) + // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 + // If it returns a valid dna code, we can extract information from there (needed for user-defined models) + string dnaCode = model->getModelDNACode(); + string modelStr = model->getName(); + if (!dnaCode.empty()) { + // if all equal + if (dnaCode.find_first_not_of(dnaCode[0]) == string::npos) + nst = 1; + // if A-G=C-T and everything else is equal to first part of dna code + else if (dnaCode[1] == dnaCode[4] && dnaCode[0] == dnaCode[2] && dnaCode[0] == dnaCode[3] && dnaCode[0] == dnaCode[5]) + nst = 2; + } else if (!modelStr.empty()) { + // Check the name of the model + if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) + nst = 1; + else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) + nst = 2; + } + + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateMod << ";" << endl; + + // Priors (apart from Fixed Freq) + if (!inclParams) { + // If not include other params, simply set fixed frequency and return + if (!equalFreq) return; + + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + out << " prset applyto=(" << partition << ")"; + + printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); + + // Frequency of Nucleotides (+F) + if (equalFreq) + out << " statefreqpr=fixed(equal)"; + else { + auto* freq = new double[model->num_states]; + model->getStateFrequency(freq); + out << " statefreqpr=dirichlet("; + for (int i = 0; i < model->num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + + delete[] freq; + } + + // Reversible Rate Matrix + int numRateEntries = model->getNumRateEntries(); + auto* rateMat = new double[numRateEntries]; + model->getRateMatrix(rateMat); + + out << " revmatpr=dirichlet("; + for (int i = 0; i < numRateEntries; ++i) { + if (i != 0) out << ", "; + out << rateMat[i]; + } + + // Close revmatpr + prset + out << ");" << endl; + + delete[] rateMat; +} + +/* Public Supplementary Functions (Used by PhyloSuperTree) */ +ofstream startMrBayesBlock(const char *filename) { + ofstream out; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + return out; +} + +void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams) +{ + switch(tree->aln->seq_type) { + case SEQ_DNA: + printMrBayesModelTextDNA(originalTree, tree, out, partition, charset, inclParams); + break; + case SEQ_CODON: + break; + case SEQ_PROTEIN: + break; + case SEQ_BINARY: + break; + case SEQ_MORPH: + break; + default: + outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(tree->aln->seq_type)); + return; + } + out << endl; // Add extra endline after model text for readability +} + +/* Protected Overrided Function */ +void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { + bool hasInvar = rate->getPInvar() > 0.0; + string key = hasInvar ? "RateGammaInvar" : "RateGamma"; + + double gamma_shape = 0.0; + double p_invar = 0.0; + + checkpoint->startStruct(key); + + CKP_RESTORE(gamma_shape); + if (hasInvar) + CKP_RESTORE(p_invar); + + checkpoint->endStruct(); + + // Sanity Check + if (gamma_shape > 0.0) + out << " shapepr=fixed(" << gamma_shape << ")"; + + if (p_invar > 0.0) + out << " pinvarpr=fixed(" << p_invar << ")"; +} + +/* Main Function */ +void PhyloTree::printMrBayesBlock(const char *filename, bool inclParams) { + ofstream out = startMrBayesBlock(filename); + // Set Outgroup (if available) + if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; + + printMrBayesModelText(this, this, out, "all", "", inclParams); + out << "end;" << endl; + out.close(); +} diff --git a/tree/phylotree.h b/tree/phylotree.h index efb6fae90..0d8c52d0e 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2285,6 +2285,21 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { */ const string& getDistanceFileWritten() const; + /** + print MrBayes block with partition information, and best model parameters (if inclParams is true) + @param filename output file name + @param inclParams whether to include iqtree params + */ + virtual void printMrBayesBlock(const char *filename, bool inclParams); + + /** + * Prints the replacement prior settings for +R or +R+I. + * @param rate the heterogeneity rate + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + */ + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + protected: @@ -2532,4 +2547,22 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { void doneProgress(); }; +/** + * print MrBayes formatted text, of model information (and parameters), to an ofstream. + * @param originalTree the original tree, of which is called to retrieve values in checkpoint file (to allow for overriding) + * @param tree tree to print info from + * @param out stream to print to + * @param partition the partition to 'apply' to (all to apply to all partitions) + * @param charset the (original) charset of the current partition. Used to retrieve values in checkpoint file. Input an empty string if this is not a partitioned tree. + * @param inclParams whether to include the parameters of the model + */ +void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams); + +/** + * Start a MrBayes Block, as well as printing a warning. + * @param filename output file name + * @return ofstream for other functions to print to + */ +ofstream startMrBayesBlock(const char *filename); + #endif diff --git a/utils/tools.cpp b/utils/tools.cpp index cddd08f45..ebc1c7fb7 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -1611,6 +1611,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.include_pre_mutations = false; params.mutation_file = ""; params.site_starting_index = 0; + params.mr_bayes_output = false; // ----------- SPRTA ---------- params.compute_SPRTA = false; @@ -5905,7 +5906,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.make_consistent = true; continue; } - + if (strcmp(argv[cnt], "-mrbayes") == 0) { + params.mr_bayes_output = true; + continue; + } if (argv[cnt][0] == '-') { string err = "Invalid \""; err += argv[cnt]; @@ -6046,6 +6050,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) if (params.partition_merge == MERGE_NONE) params.partition_merge = MERGE_RCLUSTERF; + + // Set MrBayes Block Output if -mset mrbayes + if (params.model_set == "mrbayes") + params.mr_bayes_output = true; if (params.alisim_active && !params.aln_file && !params.user_file && !params.partition_file && params.tree_gen == NONE) outError("A tree filepath is a mandatory input to execute AliSim when neither Inference mode nor Random mode (generating a random tree) is inactive. Use -t ; or Activate the inference mode by -s ; or Activate Random mode by -t RANDOM{,} where is yh, u, cat, bal, bd{,} stands for Yule-Harding, Uniform, Caterpillar, Balanced, Birth-Death model respectively."); @@ -6259,7 +6267,7 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -pll Use phylogenetic likelihood library (PLL) (default: off)" << endl << " --ninit NUM Number of initial parsimony trees (default: 100)" << endl << " --ntop NUM Number of top initial trees (default: 20)" << endl - << " --nbest NUM Number of best trees retained during search (defaut: 5)" << endl + << " --nbest NUM Number of best trees retained during search (default: 5)" << endl << " -n NUM Fix number of iterations to stop (default: OFF)" << endl << " --nstop NUM Number of unsuccessful iterations to stop (default: 100)" << endl << " --perturb NUM Perturbation strength for randomized NNI (default: 0.5)" << endl @@ -6319,6 +6327,8 @@ void usage_iqtree(char* argv[], bool full_command) { << " -m ...+LMSS Additionally test strand-symmetric models" << endl << " --mset STRING Restrict search to models supported by other programs" << endl << " (raxml, phyml, mrbayes, beast1 or beast2)" << endl + << " If 'mrbayes' is selected, will output a MrBayes" << endl + << " Block File if Data Type is supported." << endl << " --mset STR,... Comma-separated model list (e.g. -mset WAG,LG,JTT)" << endl << " --msub STRING Amino-acid model source" << endl << " (nuclear, mitochondrial, chloroplast or viral)" << endl @@ -6558,8 +6568,9 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -d Calculate the distance matrix inferred from tree" << endl // << " -stats Output some statistics about branch lengths" << endl // << " -comp Compare tree with each in the input trees" << endl; - - + << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl + << " Will not output if incompatible data types are used" << endl + << " (MrBayes only supports DNA, Codon, Protein, Binary and Morphological data)" << endl << endl; if (full_command) { diff --git a/utils/tools.h b/utils/tools.h index 749bf6cf4..d652556f1 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -2852,6 +2852,11 @@ class Params { * site starting index (for predefined mutations in AliSim) */ int site_starting_index; + + /** + * Whether to output a MrBayes Block File + */ + bool mr_bayes_output; }; /** @@ -3812,5 +3817,4 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); - #endif From 22dae835e02200b842284f25a5df6d7374248b1e Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 21:39:17 +1000 Subject: [PATCH 02/15] Support Exporting Protein, Binary and Morphological Data to MrBayes (#264) * Protein Model * Morphological Models Support * Cleanup Move Model Specific Functions to each model class Move other functions from phylotree and phylosupertree to phyloanalysis * Cleanup Imports * Binary Model Support * Misc Cleanup Misc Cleanup * Output Files Readability, Default Warning & Help Message * Fix Edge Case: Importing Values < 0.01 into MrBayes * Fix Edge Case: Extra Characters in Charset * Fix +G+I or +R Inputs * Fix Issues with Binary Model * Fix Issues with Morphology Model --- main/phyloanalysis.cpp | 82 +++++++++++++++- model/modelbin.cpp | 53 +++++++++++ model/modelbin.h | 12 +++ model/modeldna.cpp | 98 ++++++++++++++++++- model/modeldna.h | 12 ++- model/modelmorphology.cpp | 51 ++++++++++ model/modelmorphology.h | 12 +++ model/modelprotein.cpp | 89 +++++++++++++++++ model/modelprotein.h | 12 +++ model/modelsubst.cpp | 41 ++++++++ model/modelsubst.h | 22 ++++- tree/phylosupertree.cpp | 90 ++++------------- tree/phylosupertree.h | 19 +--- tree/phylotree.cpp | 196 +------------------------------------- tree/phylotree.h | 33 ------- utils/tools.cpp | 34 +++++++ utils/tools.h | 17 ++++ 17 files changed, 549 insertions(+), 324 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 3c3cb5d70..bb6f6fa08 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3302,6 +3302,82 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) { return nullptr; } +void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParams) { + ofstream out; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + + if (!iqtree->isSuperTree()) { + // Set Outgroup (if available) + if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; + + out << " [Using Model '" << iqtree->getModelName() << "']" << endl; + iqtree->getModel()->printMrBayesModelText(iqtree->getRate(), out, "all", "", false, inclParams); + + out << endl << "end;" << endl; + out.close(); + return; + } + + auto superTree = (PhyloSuperTree*) iqtree; + auto saln = (SuperAlignment*) superTree->aln; + auto size = superTree->size(); + + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } + + // Create Partition + out << endl << " partition iqtree = " << size << ": "; + for (int part = 0; part < size; part++) { + if (part != 0) out << ", "; + + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << name; + } + out << ";" << endl; + + // Set Partition for Use + out << " set partition = iqtree;" << endl; + + // Set Outgroup (if available) + if (!superTree->rooted) out << " outgroup " << superTree->root->name << ";" << endl << endl; + + // Partition-Specific Model Information + for (int part = 0; part < size; part++) { + PhyloTree* currentTree = superTree->at(part); + + // MrBayes Partitions are 1-indexed + out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; + currentTree->getModel()->printMrBayesModelText(currentTree->getRate(), out, + convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); + out << endl; + } + out << "end;" << endl; + out.close(); +} + /************************************************************ * MAIN TREE RECONSTRUCTION ***********************************************************/ @@ -3826,8 +3902,10 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); } if (params.mr_bayes_output) { - iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), false); - iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), true); + cout << endl << "Writing MrBayes Block Files..." << endl; + printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), iqtree, false); + printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), iqtree, true); + cout << endl; } cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; diff --git a/model/modelbin.cpp b/model/modelbin.cpp index e8291e00a..262cbea80 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -76,3 +76,56 @@ string ModelBIN::getNameParams(bool show_fixed_params) { } return retname.str(); } + +void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // MrBayes does not support Invariable Modifier for Binary data + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + // Rate Categories + Gamma + out << "gamma ngammacat=" << rate->getNRate(); + } else + out << "equal"; + + out << ";" << endl; + + if (!inclParams) { + if (freq_type == FREQ_EQUAL) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + // Prset Parameters + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(rate, charset, out, false); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; + + // State Frequencies + if (freq_type == FREQ_EQUAL) + out << " statefreqpr=fixed(equal)"; + else { + out << " statefreqpr=dirichlet("; + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << minValueCheckMrBayes(state_freq[i]); + } + out << ")"; + } + + out << ";" << endl; +} diff --git a/model/modelbin.h b/model/modelbin.h index 9c3ab6cf2..b2c257d9e 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -55,6 +55,18 @@ class ModelBIN : public ModelMarkov */ virtual void startCheckpoint(); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + }; #endif diff --git a/model/modeldna.cpp b/model/modeldna.cpp index f2cbc0be0..4a36e16be 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -565,6 +565,100 @@ void ModelDNA::setVariables(double *variables) { // } } -string ModelDNA::getModelDNACode() { - return param_spec; +void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + bool equalFreq = freq_type == FREQ_EQUAL; + short nst = 6; + + // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) + // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 + // If it returns a valid dna code, we can extract information from there (needed for user-defined models) + if (!param_spec.empty()) { + // if all equal + if (param_spec.find_first_not_of(param_spec[0]) == string::npos) { + nst = 1; + // if A-G=C-T and everything else is equal to first part of dna code + } else if (param_spec[1] == param_spec[4] && + param_spec[0] == param_spec[2] && param_spec[0] == param_spec[3] && param_spec[0] == param_spec[5]) { + nst = 2; + } + } else if (!name.empty()) { + // Check the name of the model + if (strcmp(name.c_str(), "JC") == 0 || strcmp(name.c_str(), "JC69") == 0 || strcmp(name.c_str(), "F81") == 0) { + nst = 1; + } else if (strcmp(name.c_str(), "K80") == 0 || strcmp(name.c_str(), "K2P") == 0 || strcmp(name.c_str(), "HKY") == 0 || strcmp(name.c_str(), "HKY85") == 0) { + nst = 2; + } + } + + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates="; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + if (hasInvariable) + out << "invgamma"; + else + out << "gamma"; + } else if (hasInvariable) + out << "propinv"; + else + out << "equal"; + + // Rate Categories + if (hasGamma) + out << " ngammacat=" << rate->getNRate(); + + out << ";" << endl; + + // Priors (apart from Fixed Freq) + if (!inclParams) { + // If not include other params, simply set fixed frequency and return + if (!equalFreq) return; + + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + Invariable Sites + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(isSuperTree, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (rate->getPInvar() > 0.0) + out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; + + // Frequency of Nucleotides (+F) + if (equalFreq) + out << " statefreqpr=fixed(equal)"; + else { + out << " statefreqpr=dirichlet("; + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << minValueCheckMrBayes(state_freq[i]); + } + out << ")"; + } + + // Reversible Rate Matrix + out << " revmatpr=dirichlet("; + for (int i = 0; i < getNumRateEntries(); ++i) { + if (i != 0) out << ", "; + out << minValueCheckMrBayes(rates[i]); + } + + // Close revmatpr + prset + out << ");" << endl; } diff --git a/model/modeldna.h b/model/modeldna.h index 3f9bd7a61..0c252c178 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -115,10 +115,16 @@ class ModelDNA : public ModelMarkov virtual void computeTipLikelihood(PML::StateType state, double *state_lk); /** - * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model - * Returns empty string by default (this is not a dna specific model) + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual string getModelDNACode(); + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); protected: diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 0e3fe4a71..b1a546565 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -171,3 +171,54 @@ void ModelMorphology::writeInfo(ostream &out) { out << endl; } } + +void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); + warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); + warnLogStream("Use Morphological Models in MrBayes with Caution!", out); + + // MrBayes does not support Invariable Modifier for Morph data + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); + } + // MrBayes does not support State Frequency for Morph data + if (freq_type != FREQ_EQUAL) { + warnLogStream("MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)", out); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + // Rate Categories + Gamma + out << "gamma ngammacat=" << rate->getNRate(); + } else + out << "equal"; + + out << ";" << endl; + + // ctype (ordered or not) + if (strcmp(name.c_str(), "ORDERED") == 0) + out << " ctype ordered: " << charset << ";" << endl; + + // Since morphological doesn't have state frequencies, if there are no parameters to prset, return + if (!inclParams || !rate->isFreeRate() && rate->getGammaShape() <= 0.0) return; + + // Prset Parameters + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(rate, charset, out, false); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; + + out << ";" << endl; +} diff --git a/model/modelmorphology.h b/model/modelmorphology.h index 44b2889d1..9d0a0f427 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -83,6 +83,18 @@ class ModelMorphology: public ModelMarkov { virtual void readRates(istream &in) noexcept(false); virtual ~ModelMorphology(); + + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 0b965fddb..44a7c78b3 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1164,3 +1164,92 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } +void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // Lset Parameters + out << " lset applyto=(" << partition << ") nucmodel=protein rates="; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + if (hasInvariable) + out << "invgamma"; + else + out << "gamma"; + } else if (hasInvariable) + out << "propinv"; + else + out << "equal"; + + // Rate Categories + if (hasGamma) + out << " ngammacat=" << rate->getNRate(); + + out << ";" << endl; + + out << " prset applyto=(" << partition << ")"; + + // Get MrBayes Model + auto aaModelMap = getIqTreeToMrBayesAAModels(); + auto iter = aaModelMap.find(name); + string mappedModel = "gtr"; + + // If model is in map, set mappedModel to the value + if (iter != aaModelMap.end()) + mappedModel = iter->second; + + out << " aamodelpr=fixed(" << mappedModel << ")"; + + // GTR Customization + if (strcmp(mappedModel.c_str(), "gtr") == 0) { + // add rate matrix and state frequencies (mandatory for setting gtr values) + out << " aarevmatpr="; + + // if matrix is GTR20, use dirichlet, else use fixed + if (strcmp(name.c_str(), "GTR20") == 0) + out << "dirichlet("; + else + out << "fixed("; + + for (int i = 0; i < getNumRateEntries(); ++i) { + if (i != 0) out << ", "; + out << minValueCheckMrBayes(rates[i]); + } + out << ")"; + + // Frequency type is never equal to FREQ_EQUAL, even with Poisson + // Frequency is also auto-set if we use a model defined by MrBayes + out << " statefreqpr=dirichlet("; + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << minValueCheckMrBayes(state_freq[i]); + } + out << ")"; + } + + // if not to include the parameters (for Protein, simply +I, +G, +R) + if (!inclParams) { + out << ";"; + return; + } + + // Freerate (+R) + // Get replacement Gamma Shape + Invariable Sites + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(isSuperTree, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (rate->getPInvar() > 0.0) + out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; + + out << ";" << endl; +} + diff --git a/model/modelprotein.h b/model/modelprotein.h index e6680eb4a..6dd1f69e6 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -79,6 +79,18 @@ class ModelProtein : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index f5533959d..efa546279 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -211,6 +211,47 @@ double *ModelSubst::newTransMatrix() { return new double[num_states * num_states]; } +void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable) +{ + double gamma_shape = 0.0; + double p_invar = 0.0; + + if (isSuperTree) { + checkpoint->startStruct("PartitionModelPlen"); + checkpoint->startStruct(charset); + } + checkpoint->startStruct("RateGammaInvar"); + + // Safety Check + if (!checkpoint->hasKey("gamma_shape") || !checkpoint->hasKey("p_invar")) { + outWarning("Could not find replacement for Freerate Distribution in MrBayes Block!"); + checkpoint->endStruct(); + return; + } + + outWarning("FreeRate Distributions (+R) are not available in MrBayes. It will be replaced by +G+I."); + + // Always replace +R with +G+I + CKP_RESTORE(gamma_shape); + if (inclInvariable) + CKP_RESTORE(p_invar); + + checkpoint->endStruct(); + if (isSuperTree) { + checkpoint->endStruct(); + checkpoint->endStruct(); + } + + out << " shapepr=fixed(" << minValueCheckMrBayes(gamma_shape) << ")"; + if (inclInvariable) + out << " pinvarpr=fixed(" << minValueCheckMrBayes(p_invar) << ")"; +} + +void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + out << " [MrBayes Block Output is not supported by this model!]" << endl; + outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); +} + ModelSubst::~ModelSubst() { // mem space pointing to target model and thus avoid double free here diff --git a/model/modelsubst.h b/model/modelsubst.h index 6e93f0754..c1d419cd5 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -17,6 +17,7 @@ #include "utils/optimization.h" #include "utils/checkpoint.h" #include "phylo-yaml/statespace.h" +#include "model/rateheterogeneity.h" using namespace std; @@ -396,10 +397,25 @@ class ModelSubst: public Optimization, public CheckpointFactory virtual ModelSubst *getMutationModel() { return this; } /** - * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model - * Returns empty string by default (this is not a dna specific model) + * Prints the replacement prior settings, in MrBayes, for +R or +R+I. + * @param isSuperTree whether the tree is a super tree + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + * @param inclInvariable whether to include invariable sites. defaults to true */ - virtual string getModelDNACode() { return ""; } + void printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable = true); + + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); /***************************************************** Checkpointing facility diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 91c3f667e..7b5d83b12 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -23,7 +23,6 @@ #include "main/phylotesting.h" #include "model/partitionmodel.h" #include "utils/MPIHelper.h" -#include "utils/tools.h" PhyloSuperTree::PhyloSuperTree() : IQTree() @@ -1527,40 +1526,33 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } -void printCharsets(SuperAlignment* saln, size_t size, ofstream &out) -{ - for (int part = 0; part < size; part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - if (!saln->partitions[part]->sequence_type.empty()) - out << saln->partitions[part]->sequence_type << ", "; - string pos = saln->partitions[part]->position_spec; - replace(pos.begin(), pos.end(), ',' , ' '); - if (!saln->partitions[part]->sequence_type.empty() && !pos.empty()) { - out << ", "; - } - out << pos << ";" << endl; - } -} - -void PhyloSuperTree::printBestPartitionParams(const char *filename) -{ +void PhyloSuperTree::printBestPartitionParams(const char *filename) { try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(filename); out << "#nexus" << endl << "begin sets;" << endl; - auto *saln = (SuperAlignment*)aln; - - printCharsets(saln, size(), out); - + int part; + SuperAlignment *saln = (SuperAlignment*)aln; + for (part = 0; part < size(); part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + if (!saln->partitions[part]->sequence_type.empty() && !pos.empty()) { + out << ", "; + } + out << pos << ";" << endl; + } out << " charpartition mymodels =" << endl; - for (int part = 0; part < size(); part++) { + for (part = 0; part < size(); part++) { string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); if (part > 0) out << "," << endl; @@ -1574,45 +1566,3 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) outError(ERR_WRITE_OUTPUT, filename); } } - -void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { - // Call PhyloTree's function inside the correct checkpoint struct - checkpoint->startStruct("PartitionModelPlen"); - checkpoint->startStruct(charset); - PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out); - checkpoint->endStruct(); - checkpoint->endStruct(); -} - -void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) -{ - ofstream out = startMrBayesBlock(filename); - auto *saln = (SuperAlignment*)aln; - printCharsets(saln, size(), out); - out << endl; - - // Create Partition - size_type listSize = size(); - out << " partition iqtree = " << listSize << ": "; - for (int part = 0; part < listSize; part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << name; - if (part != listSize - 1) out << ", "; - else out << ";" << endl; - } - - // Set Partition for Use - out << " set partition = iqtree;" << endl; - - // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; - - // Partition-Specific Model Information - for (int part = 0; part < listSize; part++) { - // MrBayes Partitions are 1-indexed - printMrBayesModelText(this, at(part), out, convertIntToString(part + 1), saln->partitions[part]->name, inclParams); - } - out << "end;" << endl; - out.close(); -} diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 19f8afbc0..140d2dcb7 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -475,25 +475,12 @@ class PhyloSuperTree : public IQTree, public vector */ void printBestPartitionParams(const char *filename); - /** - print mr bayes block with partition information and best model parameters - @param filename output file name - @param inclParams whether to include iqtree params - */ - virtual void printMrBayesBlock(const char *filename, bool inclParams); - - /** - * Prints the replacement prior settings for +R or +R+I. - * @param rate the heterogeneity rate - * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree - * @param out the ofstream to print to - */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); - + /** True when mixed codon with other data type */ bool rescale_codon_brlen; - + int totalNNIs, evalNNIs; + }; #endif diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 708491002..90d928116 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -37,7 +37,7 @@ #include "model/modelmixture.h" #include "phylonodemixlen.h" #include "phylotreemixlen.h" -#include "main/phylotesting.h" + const int LH_MIN_CONST = 1; @@ -6283,197 +6283,3 @@ void PhyloTree::doNNI_simple(NNIMove &move) { } } -/* Private Supplementary Logic Functions */ - -/** - * Find Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
- * Default is Equal (+E) - */ -string getMrBayesRHASName(RateHeterogeneity* rate) -{ - bool hasGamma = rate->getGammaShape() != 0.0; - bool hasInvariable = rate->getPInvar() != 0.0; - if (hasGamma) { - if (hasInvariable) - return "invgamma"; - return "gamma"; - } - if (hasInvariable) - return "propinv"; - return "equal"; -} - -void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out) -{ - bool hasI = rate->getPInvar() > 0.0; - - // Freerate (+R) - // Get replacement gamma shape - if (rate->isFreeRate()) { - originalTree ->printMrBayesFreeRateReplacement(rate, charset, out); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; - - // Invariable Sites (+I) - // Dirichlet is not available here, use fixed - if (hasI) - out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; -} - -void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) -{ - ModelSubst* model = tree->getModel(); - bool equalFreq = model->freq_type == FREQ_EQUAL; - short nst = 6; - string rateMod = getMrBayesRHASName(tree->getRate()); - - // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) - // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 - // If it returns a valid dna code, we can extract information from there (needed for user-defined models) - string dnaCode = model->getModelDNACode(); - string modelStr = model->getName(); - if (!dnaCode.empty()) { - // if all equal - if (dnaCode.find_first_not_of(dnaCode[0]) == string::npos) - nst = 1; - // if A-G=C-T and everything else is equal to first part of dna code - else if (dnaCode[1] == dnaCode[4] && dnaCode[0] == dnaCode[2] && dnaCode[0] == dnaCode[3] && dnaCode[0] == dnaCode[5]) - nst = 2; - } else if (!modelStr.empty()) { - // Check the name of the model - if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) - nst = 1; - else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) - nst = 2; - } - - // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) - out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateMod << ";" << endl; - - // Priors (apart from Fixed Freq) - if (!inclParams) { - // If not include other params, simply set fixed frequency and return - if (!equalFreq) return; - - out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; - return; - } - - out << " prset applyto=(" << partition << ")"; - - printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); - - // Frequency of Nucleotides (+F) - if (equalFreq) - out << " statefreqpr=fixed(equal)"; - else { - auto* freq = new double[model->num_states]; - model->getStateFrequency(freq); - out << " statefreqpr=dirichlet("; - for (int i = 0; i < model->num_states; ++i) { - if (i != 0) out << ", "; - out << freq[i]; - } - out << ")"; - - delete[] freq; - } - - // Reversible Rate Matrix - int numRateEntries = model->getNumRateEntries(); - auto* rateMat = new double[numRateEntries]; - model->getRateMatrix(rateMat); - - out << " revmatpr=dirichlet("; - for (int i = 0; i < numRateEntries; ++i) { - if (i != 0) out << ", "; - out << rateMat[i]; - } - - // Close revmatpr + prset - out << ");" << endl; - - delete[] rateMat; -} - -/* Public Supplementary Functions (Used by PhyloSuperTree) */ -ofstream startMrBayesBlock(const char *filename) { - ofstream out; - try { - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - - // Write Warning - out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl - << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl - << "[This should be used as a Template Only.]" << endl << endl; - - // Begin File, Print Charsets - out << "begin mrbayes;" << endl; - } catch (ios::failure &) { - outError(ERR_WRITE_OUTPUT, filename); - } - return out; -} - -void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams) -{ - switch(tree->aln->seq_type) { - case SEQ_DNA: - printMrBayesModelTextDNA(originalTree, tree, out, partition, charset, inclParams); - break; - case SEQ_CODON: - break; - case SEQ_PROTEIN: - break; - case SEQ_BINARY: - break; - case SEQ_MORPH: - break; - default: - outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(tree->aln->seq_type)); - return; - } - out << endl; // Add extra endline after model text for readability -} - -/* Protected Overrided Function */ -void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { - bool hasInvar = rate->getPInvar() > 0.0; - string key = hasInvar ? "RateGammaInvar" : "RateGamma"; - - double gamma_shape = 0.0; - double p_invar = 0.0; - - checkpoint->startStruct(key); - - CKP_RESTORE(gamma_shape); - if (hasInvar) - CKP_RESTORE(p_invar); - - checkpoint->endStruct(); - - // Sanity Check - if (gamma_shape > 0.0) - out << " shapepr=fixed(" << gamma_shape << ")"; - - if (p_invar > 0.0) - out << " pinvarpr=fixed(" << p_invar << ")"; -} - -/* Main Function */ -void PhyloTree::printMrBayesBlock(const char *filename, bool inclParams) { - ofstream out = startMrBayesBlock(filename); - // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; - - printMrBayesModelText(this, this, out, "all", "", inclParams); - out << "end;" << endl; - out.close(); -} diff --git a/tree/phylotree.h b/tree/phylotree.h index 0d8c52d0e..efb6fae90 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2285,21 +2285,6 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { */ const string& getDistanceFileWritten() const; - /** - print MrBayes block with partition information, and best model parameters (if inclParams is true) - @param filename output file name - @param inclParams whether to include iqtree params - */ - virtual void printMrBayesBlock(const char *filename, bool inclParams); - - /** - * Prints the replacement prior settings for +R or +R+I. - * @param rate the heterogeneity rate - * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree - * @param out the ofstream to print to - */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); - protected: @@ -2547,22 +2532,4 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { void doneProgress(); }; -/** - * print MrBayes formatted text, of model information (and parameters), to an ofstream. - * @param originalTree the original tree, of which is called to retrieve values in checkpoint file (to allow for overriding) - * @param tree tree to print info from - * @param out stream to print to - * @param partition the partition to 'apply' to (all to apply to all partitions) - * @param charset the (original) charset of the current partition. Used to retrieve values in checkpoint file. Input an empty string if this is not a partitioned tree. - * @param inclParams whether to include the parameters of the model - */ -void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams); - -/** - * Start a MrBayes Block, as well as printing a warning. - * @param filename output file name - * @return ofstream for other functions to print to - */ -ofstream startMrBayesBlock(const char *filename); - #endif diff --git a/utils/tools.cpp b/utils/tools.cpp index ebc1c7fb7..159d73448 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -6571,6 +6571,7 @@ void usage_iqtree(char* argv[], bool full_command) { << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl << " Will not output if incompatible data types are used" << endl << " (MrBayes only supports DNA, Codon, Protein, Binary and Morphological data)" << endl + << " There will be no substitution model data output for Lie Markov and Mixture models!" << endl << endl; if (full_command) { @@ -8234,3 +8235,36 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa return output_filepath + ".phy"; } } + +void warnLogStream(string warn, ofstream &out) { + outWarning(warn); + out << "[" << warn << "]" << endl; +} + +double minValueCheckMrBayes(double origValue) { + if (origValue < 0.01) { + outWarning("MrBayes does not support values < 0.01! Using 0.01 instead..."); + return 0.01; + } + return origValue; +} + +// Cached Map +unordered_map iqTreeToMrBayesAAModels; + +unordered_map getIqTreeToMrBayesAAModels() { + if (!iqTreeToMrBayesAAModels.empty()) return iqTreeToMrBayesAAModels; + + iqTreeToMrBayesAAModels["Poisson"] = "poisson"; + iqTreeToMrBayesAAModels["JTT"] = "jones"; + iqTreeToMrBayesAAModels["Dayhoff"] = "dayhoff"; + iqTreeToMrBayesAAModels["mtREV"] = "mtrev"; + iqTreeToMrBayesAAModels["mtMAM"] = "mtmam"; + iqTreeToMrBayesAAModels["WAG"] = "wag"; + iqTreeToMrBayesAAModels["rtREV"] = "rtrev"; + iqTreeToMrBayesAAModels["cpREV"] = "cprev"; + iqTreeToMrBayesAAModels["VT"] = "vt"; + iqTreeToMrBayesAAModels["Blosum62"] = "blosum"; + iqTreeToMrBayesAAModels["LG"] = "lg"; + return iqTreeToMrBayesAAModels; +} diff --git a/utils/tools.h b/utils/tools.h index d652556f1..9fde9597e 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3817,4 +3817,21 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); +/** + * Prints a warning message to the log and to the ofstream, in a NEXUS format. + */ +void warnLogStream(string warn, ofstream &out); + +/** + * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) + */ +double minValueCheckMrBayes(double origValue); + + +/** + * get a map of iqtree amino acid/protein substitution models to MrBayes amino acid/protein substitution models.
+ * models which are not supported by mrbayes are not included. GTR20 is assumed as default. + */ +unordered_map getIqTreeToMrBayesAAModels(); + #endif From de0c8acf252ded0aa1841a8db0049d2e11509a64 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:47:22 +1000 Subject: [PATCH 03/15] Support Exporting Codon Analysis to MrBayes Block Files (#266) * Codon Model * Fix Compiler Error due to Merge Conflicts * Fix Codon Model `NucModel` Parameter * Fix Empirical Warning + Indentation of Warnings * Improve Start-Of-File Warnings * Fix Indentation in alignment.cpp --- alignment/alignment.cpp | 66 +++++++++++++++++++++++++-------------- alignment/alignment.h | 6 ++++ main/phyloanalysis.cpp | 20 +++++++++--- model/modelbin.cpp | 4 ++- model/modelbin.h | 2 +- model/modelcodon.cpp | 66 +++++++++++++++++++++++++++++++++++++++ model/modelcodon.h | 11 +++++++ model/modeldna.cpp | 3 +- model/modeldna.h | 3 +- model/modelmorphology.cpp | 4 ++- model/modelmorphology.h | 3 +- model/modelprotein.cpp | 4 ++- model/modelprotein.h | 3 +- model/modelsubst.cpp | 2 +- model/modelsubst.h | 3 +- utils/tools.cpp | 42 +++++++++++++++---------- utils/tools.h | 11 ++++++- 17 files changed, 193 insertions(+), 60 deletions(-) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index d02ccdd9c..f5fba4f00 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -23,6 +23,7 @@ #include #ifdef USE_BOOST +#include #include #endif @@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria +boost::bimap genetic_code_map; + Alignment::Alignment() : vector() { @@ -1843,7 +1846,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) { (*it) = convertState(*it, seq_type); } */ - + +boost::bimap getGeneticCodeMap() { + if (!genetic_code_map.empty()) return genetic_code_map; + + genetic_code_map.insert({1, genetic_code1}); + genetic_code_map.insert({2, genetic_code2}); + genetic_code_map.insert({3, genetic_code3}); + genetic_code_map.insert({4, genetic_code4}); + genetic_code_map.insert({5, genetic_code5}); + genetic_code_map.insert({6, genetic_code6}); + genetic_code_map.insert({9, genetic_code9}); + genetic_code_map.insert({10, genetic_code10}); + genetic_code_map.insert({11, genetic_code11}); + genetic_code_map.insert({12, genetic_code12}); + genetic_code_map.insert({13, genetic_code13}); + genetic_code_map.insert({14, genetic_code14}); + genetic_code_map.insert({16, genetic_code16}); + genetic_code_map.insert({21, genetic_code21}); + genetic_code_map.insert({22, genetic_code22}); + genetic_code_map.insert({23, genetic_code23}); + genetic_code_map.insert({24, genetic_code24}); + genetic_code_map.insert({25, genetic_code25}); + + return genetic_code_map; +} + void Alignment::initCodon(char *gene_code_id) { // build index from 64 codons to non-stop codons int transl_table = 1; @@ -1853,30 +1881,13 @@ void Alignment::initCodon(char *gene_code_id) { } catch (string &str) { outError("Wrong genetic code ", gene_code_id); } - switch (transl_table) { - case 1: genetic_code = genetic_code1; break; - case 2: genetic_code = genetic_code2; break; - case 3: genetic_code = genetic_code3; break; - case 4: genetic_code = genetic_code4; break; - case 5: genetic_code = genetic_code5; break; - case 6: genetic_code = genetic_code6; break; - case 9: genetic_code = genetic_code9; break; - case 10: genetic_code = genetic_code10; break; - case 11: genetic_code = genetic_code11; break; - case 12: genetic_code = genetic_code12; break; - case 13: genetic_code = genetic_code13; break; - case 14: genetic_code = genetic_code14; break; - case 15: genetic_code = genetic_code15; break; - case 16: genetic_code = genetic_code16; break; - case 21: genetic_code = genetic_code21; break; - case 22: genetic_code = genetic_code22; break; - case 23: genetic_code = genetic_code23; break; - case 24: genetic_code = genetic_code24; break; - case 25: genetic_code = genetic_code25; break; - default: + auto codeMap = getGeneticCodeMap(); + auto found = codeMap.left.find(transl_table); + if (found == codeMap.left.end()) { outError("Wrong genetic code ", gene_code_id); - break; + return; } + genetic_code = found->second; } else { genetic_code = genetic_code1; } @@ -1909,6 +1920,15 @@ void Alignment::initCodon(char *gene_code_id) { // cout << "num_states = " << num_states << endl; } +int Alignment::getGeneticCodeId() { + if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0; + + auto code_map = getGeneticCodeMap(); + auto found = code_map.right.find(genetic_code); + if (found == code_map.right.end()) return 0; + return found->second; +} + int getMorphStates(StrVector &sequences) { char maxstate = 0; for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++) diff --git a/alignment/alignment.h b/alignment/alignment.h index 520bb9e9c..bf2753c74 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -1042,6 +1042,12 @@ class Alignment : public vector, public CharSet, public StateSpace { */ void extractMapleFile(const std::string& aln_name, const InputType& format); + /** + * Get the numerical id of the genetic code + * @return id the genetic code id, or 0 if not a codon type + */ + int getGeneticCodeId(); + protected: diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index bb6f6fa08..ca1a3b662 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3308,11 +3308,21 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam out.exceptions(ios::failbit | ios::badbit); out.open(filename); + string provide = "basic models"; + if (inclParams) provide = "optimized values"; + else if (iqtree->isSuperTree()) provide = "basic partition structure and models"; + // Write Warning out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl - << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + <<"[This MrBayes Block Declaration provides the " << provide << " from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl; + + if (inclParams) + out << "[However, for those cases, there will still be a rate matrix provided.]" << endl + << "[For DNA, this will still mean the rates may vary outside the restrictions of the model.]" << endl + << "[For Protein, this is essentially a perfect replacement.]" << endl; + + out << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl << "[This should be used as a Template Only.]" << endl << endl; // Begin File, Print Charsets @@ -3326,7 +3336,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; out << " [Using Model '" << iqtree->getModelName() << "']" << endl; - iqtree->getModel()->printMrBayesModelText(iqtree->getRate(), out, "all", "", false, inclParams); + iqtree->getModel()->printMrBayesModelText(out, "all", "", false, inclParams); out << endl << "end;" << endl; out.close(); @@ -3370,7 +3380,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // MrBayes Partitions are 1-indexed out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; - currentTree->getModel()->printMrBayesModelText(currentTree->getRate(), out, + currentTree->getModel()->printMrBayesModelText(out, convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); out << endl; } diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 262cbea80..e0100fc26 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -77,7 +77,9 @@ string ModelBIN::getNameParams(bool show_fixed_params) { return retname.str(); } -void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + RateHeterogeneity* rate = phylo_tree->getRate(); + // MrBayes does not support Invariable Modifier for Binary data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out); diff --git a/model/modelbin.h b/model/modelbin.h index b2c257d9e..b6a5024ec 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -65,7 +65,7 @@ class ModelBIN : public ModelMarkov * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index f24a1889d..fad25694a 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1182,6 +1182,72 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) { return score; } +void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2 + int nst = fix_kappa ? 1 : 2; + int code = phylo_tree->aln->getGeneticCodeId(); + string mrBayesCode = getMrBayesGeneticCode(code); + RateHeterogeneity* rate = phylo_tree->getRate(); + + if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!", out); + } + + if (mrBayesCode.empty()) { + warnLogStream("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1).", out); + mrBayesCode = "universal"; + } + + if (num_params == 0 || name.find('_') != string::npos) { // If this model is a Empirical / Semi-Empirical Model + warnLogStream("MrBayes does not support Empirical or Semi-Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); + } + + out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; + + if (!inclParams) return; + + out << " prset applyto=(" << partition << ") statefreqpr=dirichlet("; + + // State Frequency Prior + bool isFirst = true; + for (int i = 0; i < num_states; i++) { + if (phylo_tree->aln->isStopCodon(i)) continue; + + if (!isFirst) out << ", "; + else isFirst = false; + + out << state_freq[i]; + } + + out << ")"; + + /* + * TS/TV Ratio (Kappa) + * Ratio Should be: + * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA` (kappa here represents the ts/tv rate ratio) + * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA_TS` (kappa here represents the transition rate) + * `beta(1, kappa)` when `codon_kappa_style` is `CK_ONE_KAPPA_TV` (kappa here represents the transversion rate) + * `beta(kappa, kappa2)` when `codon_kappa_style` is `CK_TWO_KAPPA` (kappa here represents the transition rate, and kappa2 represents the transversion rate) + */ + if (!fix_kappa) { + double v1 = codon_kappa_style == CK_ONE_KAPPA_TV ? 1 : kappa; + double v2 = 1; + if (codon_kappa_style == CK_ONE_KAPPA_TV) + v2 = kappa; + else if (codon_kappa_style == CK_TWO_KAPPA) + v2 = kappa2; + + out << " tratiopr=beta(" << v1 << ", " << v2 << ")"; + } + + // DN/DS Ratio (Omega) + // Ratio is set to `dirichlet(omega, 1)` + if (!fix_omega) { + out << " omegapr=dirichlet(" << omega << ", 1)"; + } + + out << ";" << endl; +} void ModelCodon::writeInfo(ostream &out) { if (name.find('_') == string::npos) diff --git a/model/modelcodon.h b/model/modelcodon.h index ec85c70d6..2e0be5a6c 100644 --- a/model/modelcodon.h +++ b/model/modelcodon.h @@ -222,6 +222,17 @@ class ModelCodon: public ModelMarkov { */ virtual bool getVariables(double *variables); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + }; #endif /* MODELCODON_H_ */ diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 4a36e16be..5a4aae840 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -565,9 +565,10 @@ void ModelDNA::setVariables(double *variables) { // } } -void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { bool equalFreq = freq_type == FREQ_EQUAL; short nst = 6; + RateHeterogeneity* rate = phylo_tree->getRate(); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 diff --git a/model/modeldna.h b/model/modeldna.h index 0c252c178..98fb70f1a 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -117,14 +117,13 @@ class ModelDNA : public ModelMarkov /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); protected: diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index b1a546565..59d1a91f0 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -172,11 +172,13 @@ void ModelMorphology::writeInfo(ostream &out) { } } -void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); warnLogStream("Use Morphological Models in MrBayes with Caution!", out); + RateHeterogeneity* rate = phylo_tree->getRate(); + // MrBayes does not support Invariable Modifier for Morph data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); diff --git a/model/modelmorphology.h b/model/modelmorphology.h index 9d0a0f427..52f4f518e 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -87,14 +87,13 @@ class ModelMorphology: public ModelMarkov { /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 44a7c78b3..682e00893 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1164,10 +1164,12 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } -void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { // Lset Parameters out << " lset applyto=(" << partition << ") nucmodel=protein rates="; + RateHeterogeneity* rate = phylo_tree->getRate(); + // RHAS Specification // Free Rate should be substituted by +G+I bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); diff --git a/model/modelprotein.h b/model/modelprotein.h index 6dd1f69e6..b24cb7c1d 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -82,14 +82,13 @@ class ModelProtein : public ModelMarkov /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index efa546279..7af706a37 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -247,7 +247,7 @@ void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &chars out << " pinvarpr=fixed(" << minValueCheckMrBayes(p_invar) << ")"; } -void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { out << " [MrBayes Block Output is not supported by this model!]" << endl; outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); } diff --git a/model/modelsubst.h b/model/modelsubst.h index c1d419cd5..a91abc7d2 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -408,14 +408,13 @@ class ModelSubst: public Optimization, public CheckpointFactory /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); /***************************************************** Checkpointing facility diff --git a/utils/tools.cpp b/utils/tools.cpp index 159d73448..30bf23efd 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -8236,9 +8236,9 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa } } -void warnLogStream(string warn, ofstream &out) { +void warnLogStream(string warn, ofstream &out, string indentation) { outWarning(warn); - out << "[" << warn << "]" << endl; + out << indentation << "[" << warn << "]" << endl; } double minValueCheckMrBayes(double origValue) { @@ -8249,22 +8249,30 @@ double minValueCheckMrBayes(double origValue) { return origValue; } -// Cached Map -unordered_map iqTreeToMrBayesAAModels; +const unordered_map iqTreeToMrBayesAAModels = { + {"Poisson", "poisson"}, + {"JTT", "jones"}, + {"Dayhoff", "dayhoff"}, + {"mtREV", "mtrev"}, + {"mtMAM", "mtmam"}, + {"WAG", "wag"}, + {"rtREV", "rtrev"}, + {"cpREV", "cprev"}, + {"VT", "vt"}, + {"Blosum62", "blosum"}, + {"LG", "lg"}, +}; + +// Anything outside of index 10 (Code No. 11) is invalid, leave that as empty string +const string indexedMrBayesGeneticCodes[25] = {"universal", "vertmt", "yeast", "mycoplasma", "invermt", + "ciliate", "", "", "echinoderm", "euplotid", "universal"}; unordered_map getIqTreeToMrBayesAAModels() { - if (!iqTreeToMrBayesAAModels.empty()) return iqTreeToMrBayesAAModels; - - iqTreeToMrBayesAAModels["Poisson"] = "poisson"; - iqTreeToMrBayesAAModels["JTT"] = "jones"; - iqTreeToMrBayesAAModels["Dayhoff"] = "dayhoff"; - iqTreeToMrBayesAAModels["mtREV"] = "mtrev"; - iqTreeToMrBayesAAModels["mtMAM"] = "mtmam"; - iqTreeToMrBayesAAModels["WAG"] = "wag"; - iqTreeToMrBayesAAModels["rtREV"] = "rtrev"; - iqTreeToMrBayesAAModels["cpREV"] = "cprev"; - iqTreeToMrBayesAAModels["VT"] = "vt"; - iqTreeToMrBayesAAModels["Blosum62"] = "blosum"; - iqTreeToMrBayesAAModels["LG"] = "lg"; return iqTreeToMrBayesAAModels; } + +string getMrBayesGeneticCode(int geneticCodeId) { + if (geneticCodeId == 0) return ""; + + return indexedMrBayesGeneticCodes[geneticCodeId - 1]; +} diff --git a/utils/tools.h b/utils/tools.h index 9fde9597e..869ebd56c 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3819,8 +3819,11 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa /** * Prints a warning message to the log and to the ofstream, in a NEXUS format. + * @param warn the message to print + * @param out the ofstream to print to + * @param indentation optional indentation included only in ofstream. defaults to two spaces */ -void warnLogStream(string warn, ofstream &out); +void warnLogStream(string warn, ofstream &out, string indentation = " "); /** * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) @@ -3834,4 +3837,10 @@ double minValueCheckMrBayes(double origValue); */ unordered_map getIqTreeToMrBayesAAModels(); +/** + * get the MrBayes equivalent of a genetic code, given the id of the code. Returns and empty string if that code + * is not supported in MrBayes. + */ +string getMrBayesGeneticCode(int geneticCodeId); + #endif From 16be7099cc1ea97582ba0a6c2d6217988b1879f5 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:57:56 +1000 Subject: [PATCH 04/15] Initial Application of Reviews --- main/phyloanalysis.cpp | 32 ++++++---------------- model/modelcodon.cpp | 46 +------------------------------- model/modelcodon.h | 6 ++--- model/modeldna.cpp | 56 +++------------------------------------ model/modeldna.h | 6 ++--- model/modelmorphology.cpp | 23 ++-------------- model/modelmorphology.h | 6 ++--- model/modelprotein.cpp | 31 ++-------------------- model/modelprotein.h | 6 ++--- model/modelsubst.cpp | 40 ++-------------------------- model/modelsubst.h | 16 ++--------- 11 files changed, 28 insertions(+), 240 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index ca1a3b662..038ba9310 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1314,9 +1314,7 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { } if (params.mr_bayes_output) { - cout << endl << "MrBayes block written to:" << endl; - cout << " Base template file: " << params.out_prefix << ".mr_bayes_scheme.nex" << endl; - cout << " Template file with parameters: " << params.out_prefix << ".mr_bayes_model.nex" << endl; + cout << endl << "MrBayes block written to:" << params.out_prefix << ".mr_bayes.nex" << endl; } cout << endl; @@ -3302,27 +3300,20 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) { return nullptr; } -void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParams) { +void printMrBayesBlockFile(const char* filename, IQTree* &iqtree) { ofstream out; try { out.exceptions(ios::failbit | ios::badbit); out.open(filename); string provide = "basic models"; - if (inclParams) provide = "optimized values"; - else if (iqtree->isSuperTree()) provide = "basic partition structure and models"; + if (iqtree->isSuperTree()) provide = "basic partition structure and models"; // Write Warning out << "#nexus" << endl << endl <<"[This MrBayes Block Declaration provides the " << provide << " from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl; - - if (inclParams) - out << "[However, for those cases, there will still be a rate matrix provided.]" << endl - << "[For DNA, this will still mean the rates may vary outside the restrictions of the model.]" << endl - << "[For Protein, this is essentially a perfect replacement.]" << endl; - - out << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl << "[This should be used as a Template Only.]" << endl << endl; // Begin File, Print Charsets @@ -3332,11 +3323,8 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam } if (!iqtree->isSuperTree()) { - // Set Outgroup (if available) - if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; - out << " [Using Model '" << iqtree->getModelName() << "']" << endl; - iqtree->getModel()->printMrBayesModelText(out, "all", "", false, inclParams); + iqtree->getModel()->printMrBayesModelText(out, "all", ""); out << endl << "end;" << endl; out.close(); @@ -3371,9 +3359,6 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // Set Partition for Use out << " set partition = iqtree;" << endl; - // Set Outgroup (if available) - if (!superTree->rooted) out << " outgroup " << superTree->root->name << ";" << endl << endl; - // Partition-Specific Model Information for (int part = 0; part < size; part++) { PhyloTree* currentTree = superTree->at(part); @@ -3381,7 +3366,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // MrBayes Partitions are 1-indexed out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; currentTree->getModel()->printMrBayesModelText(out, - convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); + convertIntToString(part + 1), saln->partitions[part]->name); out << endl; } out << "end;" << endl; @@ -3913,8 +3898,7 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (params.mr_bayes_output) { cout << endl << "Writing MrBayes Block Files..." << endl; - printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), iqtree, false); - printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), iqtree, true); + printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes.nex").c_str(), iqtree); cout << endl; } diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index fad25694a..adb7135e0 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1182,7 +1182,7 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) { return score; } -void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset) { // NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2 int nst = fix_kappa ? 1 : 2; int code = phylo_tree->aln->getGeneticCodeId(); @@ -1203,50 +1203,6 @@ void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string c } out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; - - if (!inclParams) return; - - out << " prset applyto=(" << partition << ") statefreqpr=dirichlet("; - - // State Frequency Prior - bool isFirst = true; - for (int i = 0; i < num_states; i++) { - if (phylo_tree->aln->isStopCodon(i)) continue; - - if (!isFirst) out << ", "; - else isFirst = false; - - out << state_freq[i]; - } - - out << ")"; - - /* - * TS/TV Ratio (Kappa) - * Ratio Should be: - * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA` (kappa here represents the ts/tv rate ratio) - * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA_TS` (kappa here represents the transition rate) - * `beta(1, kappa)` when `codon_kappa_style` is `CK_ONE_KAPPA_TV` (kappa here represents the transversion rate) - * `beta(kappa, kappa2)` when `codon_kappa_style` is `CK_TWO_KAPPA` (kappa here represents the transition rate, and kappa2 represents the transversion rate) - */ - if (!fix_kappa) { - double v1 = codon_kappa_style == CK_ONE_KAPPA_TV ? 1 : kappa; - double v2 = 1; - if (codon_kappa_style == CK_ONE_KAPPA_TV) - v2 = kappa; - else if (codon_kappa_style == CK_TWO_KAPPA) - v2 = kappa2; - - out << " tratiopr=beta(" << v1 << ", " << v2 << ")"; - } - - // DN/DS Ratio (Omega) - // Ratio is set to `dirichlet(omega, 1)` - if (!fix_omega) { - out << " omegapr=dirichlet(" << omega << ", 1)"; - } - - out << ";" << endl; } void ModelCodon::writeInfo(ostream &out) { diff --git a/model/modelcodon.h b/model/modelcodon.h index 2e0be5a6c..921dcbce7 100644 --- a/model/modelcodon.h +++ b/model/modelcodon.h @@ -227,11 +227,9 @@ class ModelCodon: public ModelMarkov { * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); }; diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 5a4aae840..ad30d874c 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -565,7 +565,7 @@ void ModelDNA::setVariables(double *variables) { // } } -void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset) { bool equalFreq = freq_type == FREQ_EQUAL; short nst = 6; RateHeterogeneity* rate = phylo_tree->getRate(); @@ -607,59 +607,9 @@ void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string cha out << "propinv"; else out << "equal"; - - // Rate Categories - if (hasGamma) - out << " ngammacat=" << rate->getNRate(); - out << ";" << endl; - // Priors (apart from Fixed Freq) - if (!inclParams) { - // If not include other params, simply set fixed frequency and return - if (!equalFreq) return; - - out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; - return; - } - - out << " prset applyto=(" << partition << ")"; - - // Freerate (+R) - // Get replacement Gamma Shape + Invariable Sites - if (rate->isFreeRate()) { - printMrBayesFreeRateReplacement(isSuperTree, charset, out); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; - - // Invariable Sites (+I) - // Dirichlet is not available here, use fixed - if (rate->getPInvar() > 0.0) - out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; - - // Frequency of Nucleotides (+F) - if (equalFreq) - out << " statefreqpr=fixed(equal)"; - else { - out << " statefreqpr=dirichlet("; - for (int i = 0; i < num_states; ++i) { - if (i != 0) out << ", "; - out << minValueCheckMrBayes(state_freq[i]); - } - out << ")"; - } - - // Reversible Rate Matrix - out << " revmatpr=dirichlet("; - for (int i = 0; i < getNumRateEntries(); ++i) { - if (i != 0) out << ", "; - out << minValueCheckMrBayes(rates[i]); - } + if (!equalFreq) return; - // Close revmatpr + prset - out << ");" << endl; + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } diff --git a/model/modeldna.h b/model/modeldna.h index 98fb70f1a..51659386a 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -119,11 +119,9 @@ class ModelDNA : public ModelMarkov * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); protected: diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 59d1a91f0..b7f4901da 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -172,7 +172,7 @@ void ModelMorphology::writeInfo(ostream &out) { } } -void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset) { warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); warnLogStream("Use Morphological Models in MrBayes with Caution!", out); @@ -195,7 +195,7 @@ void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, str bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); if (hasGamma) { // Rate Categories + Gamma - out << "gamma ngammacat=" << rate->getNRate(); + out << "gamma"; } else out << "equal"; @@ -204,23 +204,4 @@ void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, str // ctype (ordered or not) if (strcmp(name.c_str(), "ORDERED") == 0) out << " ctype ordered: " << charset << ";" << endl; - - // Since morphological doesn't have state frequencies, if there are no parameters to prset, return - if (!inclParams || !rate->isFreeRate() && rate->getGammaShape() <= 0.0) return; - - // Prset Parameters - out << " prset applyto=(" << partition << ")"; - - // Freerate (+R) - // Get replacement Gamma Shape - if (rate->isFreeRate()) { - printMrBayesFreeRateReplacement(rate, charset, out, false); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; - - out << ";" << endl; } diff --git a/model/modelmorphology.h b/model/modelmorphology.h index 52f4f518e..7f05be107 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -89,11 +89,9 @@ class ModelMorphology: public ModelMarkov { * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 682e00893..ccdc02f57 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1164,7 +1164,7 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } -void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset) { // Lset Parameters out << " lset applyto=(" << partition << ") nucmodel=protein rates="; @@ -1183,11 +1183,6 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string out << "propinv"; else out << "equal"; - - // Rate Categories - if (hasGamma) - out << " ngammacat=" << rate->getNRate(); - out << ";" << endl; out << " prset applyto=(" << partition << ")"; @@ -1230,28 +1225,6 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string out << ")"; } - // if not to include the parameters (for Protein, simply +I, +G, +R) - if (!inclParams) { - out << ";"; - return; - } - - // Freerate (+R) - // Get replacement Gamma Shape + Invariable Sites - if (rate->isFreeRate()) { - printMrBayesFreeRateReplacement(isSuperTree, charset, out); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; - - // Invariable Sites (+I) - // Dirichlet is not available here, use fixed - if (rate->getPInvar() > 0.0) - out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; - - out << ";" << endl; + out << ";"; } diff --git a/model/modelprotein.h b/model/modelprotein.h index b24cb7c1d..dc5782576 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -84,11 +84,9 @@ class ModelProtein : public ModelMarkov * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 7af706a37..98dc953c3 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -211,44 +211,8 @@ double *ModelSubst::newTransMatrix() { return new double[num_states * num_states]; } -void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable) -{ - double gamma_shape = 0.0; - double p_invar = 0.0; - - if (isSuperTree) { - checkpoint->startStruct("PartitionModelPlen"); - checkpoint->startStruct(charset); - } - checkpoint->startStruct("RateGammaInvar"); - - // Safety Check - if (!checkpoint->hasKey("gamma_shape") || !checkpoint->hasKey("p_invar")) { - outWarning("Could not find replacement for Freerate Distribution in MrBayes Block!"); - checkpoint->endStruct(); - return; - } - - outWarning("FreeRate Distributions (+R) are not available in MrBayes. It will be replaced by +G+I."); - - // Always replace +R with +G+I - CKP_RESTORE(gamma_shape); - if (inclInvariable) - CKP_RESTORE(p_invar); - - checkpoint->endStruct(); - if (isSuperTree) { - checkpoint->endStruct(); - checkpoint->endStruct(); - } - - out << " shapepr=fixed(" << minValueCheckMrBayes(gamma_shape) << ")"; - if (inclInvariable) - out << " pinvarpr=fixed(" << minValueCheckMrBayes(p_invar) << ")"; -} - -void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { - out << " [MrBayes Block Output is not supported by this model!]" << endl; +void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset) { + out << " [MrBayes Block Output is not supported by model " << name << ", skipped." << endl; outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); } diff --git a/model/modelsubst.h b/model/modelsubst.h index a91abc7d2..33d014b2e 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -17,7 +17,6 @@ #include "utils/optimization.h" #include "utils/checkpoint.h" #include "phylo-yaml/statespace.h" -#include "model/rateheterogeneity.h" using namespace std; @@ -396,25 +395,14 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual ModelSubst *getMutationModel() { return this; } - /** - * Prints the replacement prior settings, in MrBayes, for +R or +R+I. - * @param isSuperTree whether the tree is a super tree - * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree - * @param out the ofstream to print to - * @param inclInvariable whether to include invariable sites. defaults to true - */ - void printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable = true); - /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); /***************************************************** Checkpointing facility From d7d9ea5f326b5608bbb4f851fe2785ee47b8bc94 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Tue, 6 May 2025 19:03:28 +1000 Subject: [PATCH 05/15] Partition Type Options --- main/phyloanalysis.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 1f448e669..9fcb6a40d 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3275,8 +3275,9 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) { return nullptr; } -void printMrBayesBlockFile(const char* filename, IQTree* &iqtree) { +void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { ofstream out; + string filename = string(params.out_prefix) + ".mr_bayes.nex"; try { out.exceptions(ios::failbit | ios::badbit); out.open(filename); @@ -3344,6 +3345,18 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree) { convertIntToString(part + 1), saln->partitions[part]->name); out << endl; } + + // Partition Type Settings + if (params.partition_type != TOPO_UNLINKED) { + out << "unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl; + if (params.partition_type != BRLEN_FIX) { + out << "prset applyto=(all) ratepr=variable;" << endl; + if (params.partition_type != BRLEN_SCALE) { + out << "unlink brlens=(all);" << endl; + } + } + } + out << "end;" << endl; out.close(); } @@ -3871,7 +3884,7 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (params.mr_bayes_output) { cout << endl << "Writing MrBayes Block Files..." << endl; - printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes.nex").c_str(), iqtree); + printMrBayesBlockFile(params, iqtree); cout << endl; } From 8ce94b9dd03e4bac86fbf51c34bd7c411dd16f31 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Sat, 10 May 2025 11:51:09 +1000 Subject: [PATCH 06/15] Initial Improvement of Comments, Always Output Model --- main/phyloanalysis.cpp | 5 ++-- model/modelbin.cpp | 48 ++++++++------------------------------- model/modelbin.h | 8 ++----- model/modelcodon.cpp | 25 +++++++++++++++----- model/modeldna.cpp | 33 +++++++++++++++++---------- model/modelmorphology.cpp | 35 ++++++++++++++++++---------- model/modelprotein.cpp | 48 ++++++++++++++++++++++----------------- model/modelsubst.cpp | 7 ++++-- utils/tools.cpp | 5 ---- utils/tools.h | 8 ------- 10 files changed, 109 insertions(+), 113 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 9fcb6a40d..cf4e7c2d1 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3299,7 +3299,7 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { } if (!iqtree->isSuperTree()) { - out << " [Using Model '" << iqtree->getModelName() << "']" << endl; + out << " [IQTree inferred model " << iqtree->getModelName() << ", " << endl; iqtree->getModel()->printMrBayesModelText(out, "all", ""); out << endl << "end;" << endl; @@ -3339,8 +3339,7 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { for (int part = 0; part < size; part++) { PhyloTree* currentTree = superTree->at(part); - // MrBayes Partitions are 1-indexed - out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; + out << " [Subset #" << part + 1 << " IQTree inferred model " << currentTree->getModelName() << ", "; currentTree->getModel()->printMrBayesModelText(out, convertIntToString(part + 1), saln->partitions[part]->name); out << endl; diff --git a/model/modelbin.cpp b/model/modelbin.cpp index e0100fc26..aa80f6ecc 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -77,57 +77,29 @@ string ModelBIN::getNameParams(bool show_fixed_params) { return retname.str(); } -void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) { RateHeterogeneity* rate = phylo_tree->getRate(); - // MrBayes does not support Invariable Modifier for Binary data + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + + // MrBayes's Binary Model is 'F81-like'. + out << "using MrBayes model F81" << (hasGamma ? "+G" : "") << "]" << endl; if (rate->isFreeRate() || rate->getPInvar() > 0.0) { - warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out); + out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl; + outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!"); } // Lset Parameters out << " lset applyto=(" << partition << ") rates="; - - // Free Rate should be substituted by +G (+I not supported) - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); if (hasGamma) { // Rate Categories + Gamma - out << "gamma ngammacat=" << rate->getNRate(); + out << "gamma"; } else out << "equal"; out << ";" << endl; - if (!inclParams) { - if (freq_type == FREQ_EQUAL) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; - return; - } - - // Prset Parameters - out << " prset applyto=(" << partition << ")"; - - // Freerate (+R) - // Get replacement Gamma Shape - if (rate->isFreeRate()) { - printMrBayesFreeRateReplacement(rate, charset, out, false); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; - - // State Frequencies if (freq_type == FREQ_EQUAL) - out << " statefreqpr=fixed(equal)"; - else { - out << " statefreqpr=dirichlet("; - for (int i = 0; i < num_states; ++i) { - if (i != 0) out << ", "; - out << minValueCheckMrBayes(state_freq[i]); - } - out << ")"; - } - - out << ";" << endl; + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } diff --git a/model/modelbin.h b/model/modelbin.h index b6a5024ec..a03d059e3 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -58,15 +58,11 @@ class ModelBIN : public ModelMarkov /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model + * @param charset the current partition's charset. */ - virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); - + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); }; #endif diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index adb7135e0..63f855dd8 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1184,23 +1184,36 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) { void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset) { // NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2 + // GTR codon is not available in IQTREE int nst = fix_kappa ? 1 : 2; int code = phylo_tree->aln->getGeneticCodeId(); string mrBayesCode = getMrBayesGeneticCode(code); + bool codeNotSupported = mrBayesCode.empty(); RateHeterogeneity* rate = phylo_tree->getRate(); + string modelName = fix_kappa ? "JC" : "HKY"; + + // If this model is a Empirical / Semi-Empirical Model + if (num_params == 0 || name.find('_') != string::npos) { + nst = 6; + modelName = "GTR"; + } + + out << "using MrBayes model " << modelName << "]" << endl; + if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) { - warnLogStream("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!", out); + out << " [Rate modifiers (+I, +G, +R) ignored, not supported by MrBayes codon models]" << endl; + outWarning("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!"); } - if (mrBayesCode.empty()) { - warnLogStream("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1).", out); + if (codeNotSupported) { + outWarning("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1)."); mrBayesCode = "universal"; } - if (num_params == 0 || name.find('_') != string::npos) { // If this model is a Empirical / Semi-Empirical Model - warnLogStream("MrBayes does not support Empirical or Semi-Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); - } + out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mrBayesCode << "]" << endl; + if (codeNotSupported) + out << " [Genetic code " << code << " is not supported by MrBayes, defaulting to universal (code 1)]" << endl; out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; } diff --git a/model/modeldna.cpp b/model/modeldna.cpp index ad30d874c..191988106 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -568,6 +568,7 @@ void ModelDNA::setVariables(double *variables) { void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset) { bool equalFreq = freq_type == FREQ_EQUAL; short nst = 6; + string modelName = "GTR"; RateHeterogeneity* rate = phylo_tree->getRate(); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) @@ -577,37 +578,45 @@ void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string cha // if all equal if (param_spec.find_first_not_of(param_spec[0]) == string::npos) { nst = 1; + modelName = "JC"; // if A-G=C-T and everything else is equal to first part of dna code } else if (param_spec[1] == param_spec[4] && param_spec[0] == param_spec[2] && param_spec[0] == param_spec[3] && param_spec[0] == param_spec[5]) { nst = 2; + modelName = "HKY"; } } else if (!name.empty()) { // Check the name of the model if (strcmp(name.c_str(), "JC") == 0 || strcmp(name.c_str(), "JC69") == 0 || strcmp(name.c_str(), "F81") == 0) { nst = 1; + modelName = "JC"; } else if (strcmp(name.c_str(), "K80") == 0 || strcmp(name.c_str(), "K2P") == 0 || strcmp(name.c_str(), "HKY") == 0 || strcmp(name.c_str(), "HKY85") == 0) { nst = 2; + modelName = "HKY"; } } - // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) - out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates="; - // RHAS Specification // Free Rate should be substituted by +G+I bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rateStr = "equal"; if (hasGamma) { - if (hasInvariable) - out << "invgamma"; - else - out << "gamma"; - } else if (hasInvariable) - out << "propinv"; - else - out << "equal"; - out << ";" << endl; + if (hasInvariable) { + rateStr = "invgamma"; + modelName.append("+G+I"); + } else { + rateStr = "gamma"; + modelName.append("+G"); + } + } else if (hasInvariable) { + rateStr = "propinv"; + modelName.append("+I"); + } + + out << "using MrBayes model " << modelName << "]" << endl; + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateStr << ";" << endl; if (!equalFreq) return; diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index b7f4901da..63cf8bc1b 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -172,27 +172,38 @@ void ModelMorphology::writeInfo(ostream &out) { } } -void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset) { - warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); - warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); - warnLogStream("Use Morphological Models in MrBayes with Caution!", out); - +void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset){ RateHeterogeneity* rate = phylo_tree->getRate(); - // MrBayes does not support Invariable Modifier for Morph data + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool ordered = strcmp(name.c_str(), "ORDERED") == 0; + + // MrBayes' morph model is 'JC-like' + out << "using MrBayes model " << (ordered ? "ORDERED" : "JC") << (hasGamma ? "+G" : "") << "]" << endl; + + // Warnings + + // General Morph Data Warnings + out << " [Morphological data in MrBayes is only supported with states from {0-9}]" << endl; + out << " [Morphological data containing states {A-Z] may cause errors]" << endl; + outWarning("Use morphological data in MrBayes with caution; only states from {0-9} are supported!"); + + // Unsupported +I if (rate->isFreeRate() || rate->getPInvar() > 0.0) { - warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); + out << " [+I modifier ignored, not supported by MrBayes for morphological data]" << endl; + outWarning("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!"); } - // MrBayes does not support State Frequency for Morph data + + // Unsupported Freq if (freq_type != FREQ_EQUAL) { - warnLogStream("MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)", out); + out << " [Equal frequencies forced, not supported by MrBayes for morphological data]" << endl; + outWarning("MrBayes does not support non-equal frequencies for Morphological Data! Equal frequencies forced!"); } // Lset Parameters out << " lset applyto=(" << partition << ") rates="; - // Free Rate should be substituted by +G (+I not supported) - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); if (hasGamma) { // Rate Categories + Gamma out << "gamma"; @@ -202,6 +213,6 @@ void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, str out << ";" << endl; // ctype (ordered or not) - if (strcmp(name.c_str(), "ORDERED") == 0) + if (ordered) out << " ctype ordered: " << charset << ";" << endl; } diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index ccdc02f57..4ed4a0bb2 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1165,28 +1165,8 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) } void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset) { - // Lset Parameters - out << " lset applyto=(" << partition << ") nucmodel=protein rates="; - RateHeterogeneity* rate = phylo_tree->getRate(); - // RHAS Specification - // Free Rate should be substituted by +G+I - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); - bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); - if (hasGamma) { - if (hasInvariable) - out << "invgamma"; - else - out << "gamma"; - } else if (hasInvariable) - out << "propinv"; - else - out << "equal"; - out << ";" << endl; - - out << " prset applyto=(" << partition << ")"; - // Get MrBayes Model auto aaModelMap = getIqTreeToMrBayesAAModels(); auto iter = aaModelMap.find(name); @@ -1196,7 +1176,33 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string if (iter != aaModelMap.end()) mappedModel = iter->second; - out << " aamodelpr=fixed(" << mappedModel << ")"; + out << "using MrBayes model " << mappedModel; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rateStr = "equal"; + if (hasGamma) { + if (hasInvariable) { + rateStr = "invgamma"; + out << "+G+I"; + } + else { + rateStr = "gamma"; + out << "+G"; + } + } else if (hasInvariable) { + rateStr = "propinv"; + out << "+I"; + } + + out << "]" << endl; + + // Lset Parameters + out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << rateStr << ";" << endl; + + out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mappedModel << ")"; // GTR Customization if (strcmp(mappedModel.c_str(), "gtr") == 0) { diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 98dc953c3..ae8b425fd 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -212,8 +212,11 @@ double *ModelSubst::newTransMatrix() { } void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset) { - out << " [MrBayes Block Output is not supported by model " << name << ", skipped." << endl; - outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); + out << "using MrBayes model GTR+G+I]" << endl; + out << " [Model not supported by MrBayes, defaulting to GTR+G+I (DNA)]" << endl; + outWarning("MrBayes output is not supported by model " + name + ", defaulting to GTR+G+I (DNA)!"); + + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << 6 << " rates=" << "invgamma" << ";" << endl; } ModelSubst::~ModelSubst() diff --git a/utils/tools.cpp b/utils/tools.cpp index b42752af2..c9d6e0ed9 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -8236,11 +8236,6 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa } } -void warnLogStream(string warn, ofstream &out, string indentation) { - outWarning(warn); - out << indentation << "[" << warn << "]" << endl; -} - double minValueCheckMrBayes(double origValue) { if (origValue < 0.01) { outWarning("MrBayes does not support values < 0.01! Using 0.01 instead..."); diff --git a/utils/tools.h b/utils/tools.h index 869ebd56c..e5928c661 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3817,14 +3817,6 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); -/** - * Prints a warning message to the log and to the ofstream, in a NEXUS format. - * @param warn the message to print - * @param out the ofstream to print to - * @param indentation optional indentation included only in ofstream. defaults to two spaces - */ -void warnLogStream(string warn, ofstream &out, string indentation = " "); - /** * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) */ From 6bb24c83b1188212380d19295509b87224cd0361 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Sat, 10 May 2025 12:17:49 +1000 Subject: [PATCH 07/15] Improve Comment Structure --- main/phyloanalysis.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index cf4e7c2d1..38e34739d 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1314,7 +1314,7 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { } if (params.mr_bayes_output) { - cout << endl << "MrBayes block written to:" << params.out_prefix << ".mr_bayes.nex" << endl; + cout << " MrBayes block written to:" << params.out_prefix << ".mr_bayes.nex" << endl; } cout << endl; @@ -3299,7 +3299,7 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { } if (!iqtree->isSuperTree()) { - out << " [IQTree inferred model " << iqtree->getModelName() << ", " << endl; + out << " [IQTree inferred model " << iqtree->getModelName() << ", "; iqtree->getModel()->printMrBayesModelText(out, "all", ""); out << endl << "end;" << endl; @@ -3333,13 +3333,13 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { out << ";" << endl; // Set Partition for Use - out << " set partition = iqtree;" << endl; + out << " set partition = iqtree;" << endl << endl; // Partition-Specific Model Information for (int part = 0; part < size; part++) { PhyloTree* currentTree = superTree->at(part); - out << " [Subset #" << part + 1 << " IQTree inferred model " << currentTree->getModelName() << ", "; + out << " [Subset #" << part + 1 << ": IQTree inferred model " << currentTree->getModelName() << ", "; currentTree->getModel()->printMrBayesModelText(out, convertIntToString(part + 1), saln->partitions[part]->name); out << endl; From 09ec559368e2eab7a7b42718d789d87df3a08f19 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Sat, 10 May 2025 12:25:24 +1000 Subject: [PATCH 08/15] Print Model Equal Freq --- model/modelbin.cpp | 5 +++-- model/modeldna.cpp | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/model/modelbin.cpp b/model/modelbin.cpp index aa80f6ecc..3524cd1fc 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -79,12 +79,13 @@ string ModelBIN::getNameParams(bool show_fixed_params) { void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) { RateHeterogeneity* rate = phylo_tree->getRate(); + bool equalFreq = freq_type == FREQ_EQUAL; // Free Rate should be substituted by +G (+I not supported) bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); // MrBayes's Binary Model is 'F81-like'. - out << "using MrBayes model F81" << (hasGamma ? "+G" : "") << "]" << endl; + out << "using MrBayes model F81" << (hasGamma ? "+G" : "") << (equalFreq ? "+FQ" : "") << "]" << endl; if (rate->isFreeRate() || rate->getPInvar() > 0.0) { out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl; outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!"); @@ -100,6 +101,6 @@ void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string cha out << ";" << endl; - if (freq_type == FREQ_EQUAL) + if (equalFreq) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 191988106..f85fc8ed6 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -614,11 +614,10 @@ void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string cha modelName.append("+I"); } - out << "using MrBayes model " << modelName << "]" << endl; + out << "using MrBayes model " << modelName << (equalFreq ? "+FQ" : "") << "]" << endl; // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateStr << ";" << endl; - if (!equalFreq) return; - - out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + if (equalFreq) + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } From 83bb250cd1185ff042b181a9a105dc0dd5ff8046 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Sat, 10 May 2025 12:45:55 +1000 Subject: [PATCH 09/15] Fix Console Log Indentation --- main/phyloanalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 38e34739d..cc2f9f0ef 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1314,7 +1314,7 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { } if (params.mr_bayes_output) { - cout << " MrBayes block written to:" << params.out_prefix << ".mr_bayes.nex" << endl; + cout << " MrBayes block written to: " << params.out_prefix << ".mr_bayes.nex" << endl; } cout << endl; From 92e7916ae2b37b83ba380530349f07cdd02b4425 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Thu, 22 May 2025 23:15:12 +1000 Subject: [PATCH 10/15] Fix Variable Names --- alignment/alignment.cpp | 6 +++--- main/phyloanalysis.cpp | 28 +++++++++++++------------- model/modelbin.cpp | 10 +++++----- model/modelcodon.cpp | 20 +++++++++---------- model/modeldna.cpp | 42 +++++++++++++++++++-------------------- model/modelmorphology.cpp | 6 +++--- model/modelprotein.cpp | 36 ++++++++++++++++----------------- utils/tools.cpp | 14 ++++++------- utils/tools.h | 2 +- 9 files changed, 82 insertions(+), 82 deletions(-) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index f5fba4f00..7f048dc8b 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -1881,9 +1881,9 @@ void Alignment::initCodon(char *gene_code_id) { } catch (string &str) { outError("Wrong genetic code ", gene_code_id); } - auto codeMap = getGeneticCodeMap(); - auto found = codeMap.left.find(transl_table); - if (found == codeMap.left.end()) { + auto code_map = getGeneticCodeMap(); + auto found = code_map.left.find(transl_table); + if (found == code_map.left.end()) { outError("Wrong genetic code ", gene_code_id); return; } diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index cc2f9f0ef..768031468 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3282,12 +3282,11 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { out.exceptions(ios::failbit | ios::badbit); out.open(filename); - string provide = "basic models"; - if (iqtree->isSuperTree()) provide = "basic partition structure and models"; - // Write Warning out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the " << provide << " from the IQTree Run.]" << endl + <<"[This MrBayes Block Declaration provides the basic " + << (iqtree->isSuperTree() ? "partition structure and models" : "models") + << " from the IQTree Run.]" << endl << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl << "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl << "[This should be used as a Template Only.]" << endl << endl; @@ -3307,9 +3306,9 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { return; } - auto superTree = (PhyloSuperTree*) iqtree; - auto saln = (SuperAlignment*) superTree->aln; - auto size = superTree->size(); + auto stree = (PhyloSuperTree*) iqtree; + auto saln = (SuperAlignment*) stree->aln; + auto size = stree->size(); for (int part = 0; part < size; part++) { string name = saln->partitions[part]->name; @@ -3337,11 +3336,12 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { // Partition-Specific Model Information for (int part = 0; part < size; part++) { - PhyloTree* currentTree = superTree->at(part); + PhyloTree* curr_tree = stree->at(part); - out << " [Subset #" << part + 1 << ": IQTree inferred model " << currentTree->getModelName() << ", "; - currentTree->getModel()->printMrBayesModelText(out, - convertIntToString(part + 1), saln->partitions[part]->name); + out << " [Subset #" << part + 1 << ": IQTree inferred model " << curr_tree->getModelName() << ", "; + curr_tree->getModel()->printMrBayesModelText(out, + convertIntToString(part + 1), + saln->partitions[part]->name); out << endl; } @@ -3877,9 +3877,9 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (iqtree->isSuperTree()) { - auto superTree = (PhyloSuperTree*) iqtree; - superTree->computeBranchLengths(); - superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + auto stree = (PhyloSuperTree*) iqtree; + stree->computeBranchLengths(); + stree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); } if (params.mr_bayes_output) { cout << endl << "Writing MrBayes Block Files..." << endl; diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 3524cd1fc..8ec5fc506 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -79,13 +79,13 @@ string ModelBIN::getNameParams(bool show_fixed_params) { void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) { RateHeterogeneity* rate = phylo_tree->getRate(); - bool equalFreq = freq_type == FREQ_EQUAL; + bool equal_freq = freq_type == FREQ_EQUAL; // Free Rate should be substituted by +G (+I not supported) - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); // MrBayes's Binary Model is 'F81-like'. - out << "using MrBayes model F81" << (hasGamma ? "+G" : "") << (equalFreq ? "+FQ" : "") << "]" << endl; + out << "using MrBayes model F81" << (has_gamma ? "+G" : "") << (equal_freq ? "+FQ" : "") << "]" << endl; if (rate->isFreeRate() || rate->getPInvar() > 0.0) { out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl; outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!"); @@ -93,7 +93,7 @@ void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string cha // Lset Parameters out << " lset applyto=(" << partition << ") rates="; - if (hasGamma) { + if (has_gamma) { // Rate Categories + Gamma out << "gamma"; } else @@ -101,6 +101,6 @@ void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string cha out << ";" << endl; - if (equalFreq) + if (equal_freq) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index 63f855dd8..d9858df7d 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1187,35 +1187,35 @@ void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string c // GTR codon is not available in IQTREE int nst = fix_kappa ? 1 : 2; int code = phylo_tree->aln->getGeneticCodeId(); - string mrBayesCode = getMrBayesGeneticCode(code); - bool codeNotSupported = mrBayesCode.empty(); + string mr_bayes_code = getMrBayesGeneticCode(code); + bool code_not_supported = mr_bayes_code.empty(); RateHeterogeneity* rate = phylo_tree->getRate(); - string modelName = fix_kappa ? "JC" : "HKY"; + string model_name = fix_kappa ? "JC" : "HKY"; // If this model is a Empirical / Semi-Empirical Model if (num_params == 0 || name.find('_') != string::npos) { nst = 6; - modelName = "GTR"; + model_name = "GTR"; } - out << "using MrBayes model " << modelName << "]" << endl; + out << "using MrBayes model " << model_name << "]" << endl; if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) { out << " [Rate modifiers (+I, +G, +R) ignored, not supported by MrBayes codon models]" << endl; outWarning("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!"); } - if (codeNotSupported) { + if (code_not_supported) { outWarning("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1)."); - mrBayesCode = "universal"; + mr_bayes_code = "universal"; } - out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mrBayesCode << "]" << endl; - if (codeNotSupported) + out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mr_bayes_code << "]" << endl; + if (code_not_supported) out << " [Genetic code " << code << " is not supported by MrBayes, defaulting to universal (code 1)]" << endl; - out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mr_bayes_code << ";" << endl; } void ModelCodon::writeInfo(ostream &out) { diff --git a/model/modeldna.cpp b/model/modeldna.cpp index f85fc8ed6..375c63fdd 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -566,9 +566,9 @@ void ModelDNA::setVariables(double *variables) { } void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset) { - bool equalFreq = freq_type == FREQ_EQUAL; + bool equal_freq = freq_type == FREQ_EQUAL; short nst = 6; - string modelName = "GTR"; + string model_name = "GTR"; RateHeterogeneity* rate = phylo_tree->getRate(); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) @@ -578,46 +578,46 @@ void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string cha // if all equal if (param_spec.find_first_not_of(param_spec[0]) == string::npos) { nst = 1; - modelName = "JC"; + model_name = "JC"; // if A-G=C-T and everything else is equal to first part of dna code } else if (param_spec[1] == param_spec[4] && param_spec[0] == param_spec[2] && param_spec[0] == param_spec[3] && param_spec[0] == param_spec[5]) { nst = 2; - modelName = "HKY"; + model_name = "HKY"; } } else if (!name.empty()) { // Check the name of the model if (strcmp(name.c_str(), "JC") == 0 || strcmp(name.c_str(), "JC69") == 0 || strcmp(name.c_str(), "F81") == 0) { nst = 1; - modelName = "JC"; + model_name = "JC"; } else if (strcmp(name.c_str(), "K80") == 0 || strcmp(name.c_str(), "K2P") == 0 || strcmp(name.c_str(), "HKY") == 0 || strcmp(name.c_str(), "HKY85") == 0) { nst = 2; - modelName = "HKY"; + model_name = "HKY"; } } // RHAS Specification // Free Rate should be substituted by +G+I - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); - bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); - string rateStr = "equal"; - if (hasGamma) { - if (hasInvariable) { - rateStr = "invgamma"; - modelName.append("+G+I"); + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_invariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rate_str = "equal"; + if (has_gamma) { + if (has_invariable) { + rate_str = "invgamma"; + model_name.append("+G+I"); } else { - rateStr = "gamma"; - modelName.append("+G"); + rate_str = "gamma"; + model_name.append("+G"); } - } else if (hasInvariable) { - rateStr = "propinv"; - modelName.append("+I"); + } else if (has_invariable) { + rate_str = "propinv"; + model_name.append("+I"); } - out << "using MrBayes model " << modelName << (equalFreq ? "+FQ" : "") << "]" << endl; + out << "using MrBayes model " << model_name << (equal_freq ? "+FQ" : "") << "]" << endl; // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) - out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateStr << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rate_str << ";" << endl; - if (equalFreq) + if (equal_freq) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; } diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 63cf8bc1b..d3ce0d7a6 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -176,11 +176,11 @@ void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, str RateHeterogeneity* rate = phylo_tree->getRate(); // Free Rate should be substituted by +G (+I not supported) - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); bool ordered = strcmp(name.c_str(), "ORDERED") == 0; // MrBayes' morph model is 'JC-like' - out << "using MrBayes model " << (ordered ? "ORDERED" : "JC") << (hasGamma ? "+G" : "") << "]" << endl; + out << "using MrBayes model " << (ordered ? "ORDERED" : "JC") << (has_gamma ? "+G" : "") << "]" << endl; // Warnings @@ -204,7 +204,7 @@ void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, str // Lset Parameters out << " lset applyto=(" << partition << ") rates="; - if (hasGamma) { + if (has_gamma) { // Rate Categories + Gamma out << "gamma"; } else diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 4ed4a0bb2..6688ba5e2 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1168,44 +1168,44 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string RateHeterogeneity* rate = phylo_tree->getRate(); // Get MrBayes Model - auto aaModelMap = getIqTreeToMrBayesAAModels(); - auto iter = aaModelMap.find(name); - string mappedModel = "gtr"; + auto aa_model_map = getIqTreeToMrBayesAAModels(); + auto iter = aa_model_map.find(name); + string mapped_model = "gtr"; // If model is in map, set mappedModel to the value - if (iter != aaModelMap.end()) - mappedModel = iter->second; + if (iter != aa_model_map.end()) + mapped_model = iter->second; - out << "using MrBayes model " << mappedModel; + out << "using MrBayes model " << mapped_model; // RHAS Specification // Free Rate should be substituted by +G+I - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); - bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); - string rateStr = "equal"; - if (hasGamma) { - if (hasInvariable) { - rateStr = "invgamma"; + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_invariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rate_str = "equal"; + if (has_gamma) { + if (has_invariable) { + rate_str = "invgamma"; out << "+G+I"; } else { - rateStr = "gamma"; + rate_str = "gamma"; out << "+G"; } - } else if (hasInvariable) { - rateStr = "propinv"; + } else if (has_invariable) { + rate_str = "propinv"; out << "+I"; } out << "]" << endl; // Lset Parameters - out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << rateStr << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << rate_str << ";" << endl; - out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mappedModel << ")"; + out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mapped_model << ")"; // GTR Customization - if (strcmp(mappedModel.c_str(), "gtr") == 0) { + if (strcmp(mapped_model.c_str(), "gtr") == 0) { // add rate matrix and state frequencies (mandatory for setting gtr values) out << " aarevmatpr="; diff --git a/utils/tools.cpp b/utils/tools.cpp index c9d6e0ed9..1892f38b9 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -8236,15 +8236,15 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa } } -double minValueCheckMrBayes(double origValue) { - if (origValue < 0.01) { +double minValueCheckMrBayes(double orig_value) { + if (orig_value < 0.01) { outWarning("MrBayes does not support values < 0.01! Using 0.01 instead..."); return 0.01; } - return origValue; + return orig_value; } -const unordered_map iqTreeToMrBayesAAModels = { +const unordered_map iqtree_to_mr_bayes_aa_models = { {"Poisson", "poisson"}, {"JTT", "jones"}, {"Dayhoff", "dayhoff"}, @@ -8259,15 +8259,15 @@ const unordered_map iqTreeToMrBayesAAModels = { }; // Anything outside of index 10 (Code No. 11) is invalid, leave that as empty string -const string indexedMrBayesGeneticCodes[25] = {"universal", "vertmt", "yeast", "mycoplasma", "invermt", +const string indexed_mr_bayes_genetic_codes[25] = {"universal", "vertmt", "yeast", "mycoplasma", "invermt", "ciliate", "", "", "echinoderm", "euplotid", "universal"}; unordered_map getIqTreeToMrBayesAAModels() { - return iqTreeToMrBayesAAModels; + return iqtree_to_mr_bayes_aa_models; } string getMrBayesGeneticCode(int geneticCodeId) { if (geneticCodeId == 0) return ""; - return indexedMrBayesGeneticCodes[geneticCodeId - 1]; + return indexed_mr_bayes_genetic_codes[geneticCodeId - 1]; } diff --git a/utils/tools.h b/utils/tools.h index e5928c661..4d7194c02 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3820,7 +3820,7 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa /** * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) */ -double minValueCheckMrBayes(double origValue); +double minValueCheckMrBayes(double orig_value); /** From 2d40bffc22666dabdec20b1dbe8a11b043151a37 Mon Sep 17 00:00:00 2001 From: thomaskf Date: Tue, 1 Jul 2025 10:08:53 +1000 Subject: [PATCH 11/15] For exporting protein model to MrBayes, disable the output of the GTR parameters and handle +FQ frequency --- main/phyloanalysis.cpp | 6 +++--- model/modelbin.cpp | 2 +- model/modeldna.cpp | 2 +- model/modelprotein.cpp | 9 +++++++++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 323f7a86c..6095b56d8 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3379,11 +3379,11 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { // Partition Type Settings if (params.partition_type != TOPO_UNLINKED) { - out << "unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl; + out << " unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl; if (params.partition_type != BRLEN_FIX) { - out << "prset applyto=(all) ratepr=variable;" << endl; + out << " prset applyto=(all) ratepr=variable;" << endl; if (params.partition_type != BRLEN_SCALE) { - out << "unlink brlens=(all);" << endl; + out << " unlink brlens=(all);" << endl; } } } diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 8ec5fc506..b394e6ab1 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -79,7 +79,7 @@ string ModelBIN::getNameParams(bool show_fixed_params) { void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) { RateHeterogeneity* rate = phylo_tree->getRate(); - bool equal_freq = freq_type == FREQ_EQUAL; + bool equal_freq = (freq_type == FREQ_EQUAL); // Free Rate should be substituted by +G (+I not supported) bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 375c63fdd..3c44d0020 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -566,7 +566,7 @@ void ModelDNA::setVariables(double *variables) { } void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset) { - bool equal_freq = freq_type == FREQ_EQUAL; + bool equal_freq = (freq_type == FREQ_EQUAL); short nst = 6; string model_name = "GTR"; RateHeterogeneity* rate = phylo_tree->getRate(); diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 6688ba5e2..93958b2af 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1166,6 +1166,7 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset) { RateHeterogeneity* rate = phylo_tree->getRate(); + bool equal_freq = (freq_type == FREQ_EQUAL); // Get MrBayes Model auto aa_model_map = getIqTreeToMrBayesAAModels(); @@ -1178,6 +1179,9 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string out << "using MrBayes model " << mapped_model; + if (equal_freq) + out << "+FQ"; + // RHAS Specification // Free Rate should be substituted by +G+I bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); @@ -1204,6 +1208,10 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mapped_model << ")"; + if (equal_freq) + out << " statefreqpr=fixed(equal)"; + + /* // GTR Customization if (strcmp(mapped_model.c_str(), "gtr") == 0) { // add rate matrix and state frequencies (mandatory for setting gtr values) @@ -1230,6 +1238,7 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string } out << ")"; } + */ out << ";"; } From f1be77779a0de3036f45297d349fc1f2d036fc4a Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:30:44 +1000 Subject: [PATCH 12/15] Comment: Update Fallback Model for Protein --- main/phyloanalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index fdf7f6586..83fe563b0 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3323,7 +3323,7 @@ void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { <<"[This MrBayes Block Declaration provides the basic " << (iqtree->isSuperTree() ? "partition structure and models" : "models") << " from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'gtr' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl << "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl << "[This should be used as a Template Only.]" << endl << endl; From 50ff3bc5370a11ae3d6060073d38a9c39600f515 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:32:55 +1000 Subject: [PATCH 13/15] Protein: Fully Remove GTR Rate Matrix Logic The process is still reversible due to git logs. --- model/modelprotein.cpp | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 93958b2af..cabe976e2 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1207,39 +1207,10 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << rate_str << ";" << endl; out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mapped_model << ")"; - + if (equal_freq) out << " statefreqpr=fixed(equal)"; - /* - // GTR Customization - if (strcmp(mapped_model.c_str(), "gtr") == 0) { - // add rate matrix and state frequencies (mandatory for setting gtr values) - out << " aarevmatpr="; - - // if matrix is GTR20, use dirichlet, else use fixed - if (strcmp(name.c_str(), "GTR20") == 0) - out << "dirichlet("; - else - out << "fixed("; - - for (int i = 0; i < getNumRateEntries(); ++i) { - if (i != 0) out << ", "; - out << minValueCheckMrBayes(rates[i]); - } - out << ")"; - - // Frequency type is never equal to FREQ_EQUAL, even with Poisson - // Frequency is also auto-set if we use a model defined by MrBayes - out << " statefreqpr=dirichlet("; - for (int i = 0; i < num_states; ++i) { - if (i != 0) out << ", "; - out << minValueCheckMrBayes(state_freq[i]); - } - out << ")"; - } - */ - out << ";"; } From f462794b0afd9d847140c7445801185a567a3cd2 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:40:14 +1000 Subject: [PATCH 14/15] Protein: Fix Missing Endline --- model/modelprotein.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index cabe976e2..4fc7b0907 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1211,6 +1211,6 @@ void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string if (equal_freq) out << " statefreqpr=fixed(equal)"; - out << ";"; + out << ";" << endl; } From df6e521a012360898f47f393889236512e0b0014 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:44:23 +1000 Subject: [PATCH 15/15] Update Help Comment: Model is Always Outputted --- utils/tools.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/tools.cpp b/utils/tools.cpp index 7e3939941..f9bb1313b 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -6588,9 +6588,6 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -stats Output some statistics about branch lengths" << endl // << " -comp Compare tree with each in the input trees" << endl; << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl - << " Will not output if incompatible data types are used" << endl - << " (MrBayes only supports DNA, Codon, Protein, Binary and Morphological data)" << endl - << " There will be no substitution model data output for Lie Markov and Mixture models!" << endl << endl; if (full_command) {