From 1b9b02852473e2a78b9da80ab781355352a69e2e Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 11:16:28 +1000 Subject: [PATCH 01/12] Protein Model --- main/phyloanalysis.cpp | 2 + tree/phylotree.cpp | 101 ++++++++++++++++++++++++++++++++++------- utils/tools.cpp | 20 ++++++++ utils/tools.h | 6 +++ 4 files changed, 113 insertions(+), 16 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 48719d28a..bfa98f922 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3111,8 +3111,10 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { superTree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); } if (params.mr_bayes_output) { + cout << endl << "Writing MrBayes Block Files..." << endl; iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), false); iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), true); + cout << endl; } cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 8b1f6bce8..524c04581 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -6254,8 +6254,9 @@ void PhyloTree::doNNI_simple(NNIMove &move) { */ string getMrBayesRHASName(RateHeterogeneity* rate) { - bool hasGamma = rate->getGammaShape() != 0.0; - bool hasInvariable = rate->getPInvar() != 0.0; + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); if (hasGamma) { if (hasInvariable) return "invgamma"; @@ -6271,9 +6272,9 @@ void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, str bool hasI = rate->getPInvar() > 0.0; // Freerate (+R) - // Get replacement gamma shape + // Get replacement Gamma Shape + Invariable Sites if (rate->isFreeRate()) { - originalTree ->printMrBayesFreeRateReplacement(rate, charset, out); + originalTree->printMrBayesFreeRateReplacement(rate, charset, out); } // Gamma Distribution (+G/+R) @@ -6361,6 +6362,71 @@ void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream out << ");" << endl; delete[] rateMat; +}; + +void printMrBayesModelTextProtein(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) +{ + // Lset Parameters + out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << getMrBayesRHASName(tree->getRate()) << ";" << endl; + + out << " prset applyto=(" << partition << ")"; + + // Model + ModelSubst* model = tree->getModel(); + + // use this instead of getName, getName returns +F, +FU, etc. + string modelName = model->name; + + auto aaModelMap = getIqTreeToMrBayesAAModels(); + auto iter = aaModelMap.find(modelName); + string mappedModel = "gtr"; + + // If model is in map, set mappedModel to the value + if (iter != aaModelMap.end()) + mappedModel = iter->second; + + out << " aamodelpr=fixed(" << mappedModel << ")"; + + if (strcmp(mappedModel.c_str(), "gtr") == 0) { + // add rate matrix and state frequencies (mandatory for setting gtr values) + int numRateEntries = model->getNumRateEntries(); + auto* rateMat = new double[numRateEntries]; + model->getRateMatrix(rateMat); + + out << " aarevmatpr="; + + // if matrix is GTR20, use dirichlet, else use fixed + if (strcmp(modelName.c_str(), "GTR20") == 0) + out << "dirichlet("; + else + out << "fixed("; + + for (int i = 0; i < numRateEntries; ++i) { + if (i != 0) out << ", "; + out << rateMat[i]; + } + out << ")"; + + // Frequency type is never equal to FREQ_EQUAL, even with Poisson + // Frequency is also auto-set if we use a model defined by MrBayes + out << " statefreqpr=dirichlet("; + auto* freq = new double[model->num_states]; + model->getStateFrequency(freq); + for (int i = 0; i < model->num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + } + + // if not to include the parameters (for Protein, simply +I, +G, +R) + if (!inclParams) { + out << ";"; + return; + } + + printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); + out << ";"; } /* Public Supplementary Functions (Used by PhyloSuperTree) */ @@ -6394,6 +6460,7 @@ void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& o case SEQ_CODON: break; case SEQ_PROTEIN: + printMrBayesModelTextProtein(originalTree, tree, out, partition, charset, inclParams); break; case SEQ_BINARY: break; @@ -6408,26 +6475,28 @@ void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& o /* Protected Overrided Function */ void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { - bool hasInvar = rate->getPInvar() > 0.0; - string key = hasInvar ? "RateGammaInvar" : "RateGamma"; - double gamma_shape = 0.0; double p_invar = 0.0; - checkpoint->startStruct(key); + checkpoint->startStruct("RateGammaInvar"); + // Safety Check + if (!checkpoint->hasKey("gamma_shape") || !checkpoint->hasKey("p_invar")) { + outWarning("Could not find replacement for Freerate Distribution in MrBayes Block!"); + checkpoint->endStruct(); + return; + } + + outWarning("FreeRate Distributions (+R) are not available in MrBayes. It will be replaced by +G+I."); + + // Always replace +R with +G+I CKP_RESTORE(gamma_shape); - if (hasInvar) - CKP_RESTORE(p_invar); + CKP_RESTORE(p_invar); checkpoint->endStruct(); - // Sanity Check - if (gamma_shape > 0.0) - out << " shapepr=fixed(" << gamma_shape << ")"; - - if (p_invar > 0.0) - out << " pinvarpr=fixed(" << p_invar << ")"; + out << " shapepr=fixed(" << gamma_shape << ")"; + out << " pinvarpr=fixed(" << p_invar << ")"; } /* Main Function */ diff --git a/utils/tools.cpp b/utils/tools.cpp index db21f7917..5ee56e382 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -7910,3 +7910,23 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa return output_filepath + ".phy"; } } + +// Cached Map +unordered_map iqTreeToMrBayesAAModels; + +unordered_map getIqTreeToMrBayesAAModels() { + if (!iqTreeToMrBayesAAModels.empty()) return iqTreeToMrBayesAAModels; + + iqTreeToMrBayesAAModels["Poisson"] = "poisson"; + iqTreeToMrBayesAAModels["JTT"] = "jones"; + iqTreeToMrBayesAAModels["Dayhoff"] = "dayhoff"; + iqTreeToMrBayesAAModels["mtREV"] = "mtrev"; + iqTreeToMrBayesAAModels["mtMAM"] = "mtmam"; + iqTreeToMrBayesAAModels["WAG"] = "wag"; + iqTreeToMrBayesAAModels["rtREV"] = "rtrev"; + iqTreeToMrBayesAAModels["cpREV"] = "cprev"; + iqTreeToMrBayesAAModels["VT"] = "vt"; + iqTreeToMrBayesAAModels["Blosum62"] = "blosum"; + iqTreeToMrBayesAAModels["LG"] = "lg"; + return iqTreeToMrBayesAAModels; +} diff --git a/utils/tools.h b/utils/tools.h index 75e538027..ec49d1b13 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3733,4 +3733,10 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); +/** + * get a map of iqtree amino acid/protein substitution models to MrBayes amino acid/protein substitution models.
+ * models which are not supported by mrbayes are not included. GTR20 is assumed as default. + */ +unordered_map getIqTreeToMrBayesAAModels(); + #endif From 6aac6b844de7b2b2935e3912eae730a15bef7688 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 12:42:31 +1000 Subject: [PATCH 02/12] Morphological Models Support --- tree/phylosupertree.cpp | 4 +- tree/phylosupertree.h | 2 +- tree/phylotree.cpp | 91 ++++++++++++++++++++++++++++++++--------- tree/phylotree.h | 3 +- 4 files changed, 76 insertions(+), 24 deletions(-) diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 9d0213051..8b9057d3a 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -1565,11 +1565,11 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) } } -void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { +void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out, bool inclInvariable) { // Call PhyloTree's function inside the correct checkpoint struct checkpoint->startStruct("PartitionModelPlen"); checkpoint->startStruct(charset); - PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out); + PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out, inclInvariable); checkpoint->endStruct(); checkpoint->endStruct(); } diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index f2304fcab..ccc430d21 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -488,7 +488,7 @@ class PhyloSuperTree : public IQTree, public vector * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree * @param out the ofstream to print to */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true); /** True when mixed codon with other data type */ bool rescale_codon_brlen; diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 524c04581..a4f5d16a7 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -6249,28 +6249,34 @@ void PhyloTree::doNNI_simple(NNIMove &move) { /* Private Supplementary Logic Functions */ /** - * Find Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
- * Default is Equal (+E) + * Find Gamma Rate Categories & Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
+ * Default is Equal (+E)
*/ -string getMrBayesRHASName(RateHeterogeneity* rate) +void printMrBayesRHASInfo(RateHeterogeneity* rate, ofstream &out, bool inclInvariable = true) { + out << " rates="; + // Free Rate should be substituted by +G+I bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); if (hasGamma) { - if (hasInvariable) - return "invgamma"; - return "gamma"; + if (hasInvariable && inclInvariable) + out << "invgamma"; + else + out << "gamma"; } - if (hasInvariable) - return "propinv"; - return "equal"; + if (hasInvariable && inclInvariable) + out << "propinv"; + else + out << "equal"; + + // Rate Categories + if (hasGamma) + out << " ngammacat=" << rate->getNRate(); } -void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out) +void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true) { - bool hasI = rate->getPInvar() > 0.0; - // Freerate (+R) // Get replacement Gamma Shape + Invariable Sites if (rate->isFreeRate()) { @@ -6284,7 +6290,7 @@ void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, str // Invariable Sites (+I) // Dirichlet is not available here, use fixed - if (hasI) + if (rate->getPInvar() > 0.0 && inclInvariable) out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; } @@ -6293,7 +6299,6 @@ void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream ModelSubst* model = tree->getModel(); bool equalFreq = model->freq_type == FREQ_EQUAL; short nst = 6; - string rateMod = getMrBayesRHASName(tree->getRate()); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 @@ -6316,7 +6321,10 @@ void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream } // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) - out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rateMod << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst; + + printMrBayesRHASInfo(tree->getRate(), out); + out << ";" << endl; // Priors (apart from Fixed Freq) if (!inclParams) { @@ -6367,7 +6375,9 @@ void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream void printMrBayesModelTextProtein(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) { // Lset Parameters - out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << getMrBayesRHASName(tree->getRate()) << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=protein"; + printMrBayesRHASInfo(tree->getRate(), out); + out << ";" << endl; out << " prset applyto=(" << partition << ")"; @@ -6426,7 +6436,45 @@ void printMrBayesModelTextProtein(PhyloTree* originalTree, PhyloTree* tree, ofst } printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); - out << ";"; + out << ";" << endl; +} + +void printMrBayesModelTextMorphological(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) +{ + outWarning("MrBayes only supports Morphological Data with states from {0-9}!"); + outWarning("Morphological Data with states {A-Z} may cause errors!"); + outWarning("Use the Morphological Model in MrBayes with Caution!"); + + out << "[MrBayes only supports Morphological Data with states from {0-9}!]" << endl + << "[Morphological Data with states {A-Z} may cause errors!]" << endl + << "[Use the Morphological Model in MrBayes with Caution!]" << endl; + + // MrBayes does not support Invariable Modifier for Morph data + if (tree->getRate()->isFreeRate() || tree->getRate()->getPInvar() > 0.0) { + outWarning("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!"); + out << "[MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!]" << endl; + } + // MrBayes does not support State Frequency for Morph data + if (tree->getModel()->getFreqType() != FREQ_EQUAL) { + outWarning("MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)"); + out << "[MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)]" << endl; + } + + // Lset Parameters + out << " lset applyto=(" << partition << ")"; + printMrBayesRHASInfo(tree->getRate(), out, false); + out << ";" << endl; + + // ctype (ordered or not) + if (strcmp(tree->getModel()->name.c_str(), "ORDERED") == 0) + out << " ctype ordered;" << endl; + + if (!inclParams) return; + + // Prset Parameters + out << " prset applyto=(" << partition << ")"; + printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out, false); + out << ";" << endl; } /* Public Supplementary Functions (Used by PhyloSuperTree) */ @@ -6465,6 +6513,7 @@ void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& o case SEQ_BINARY: break; case SEQ_MORPH: + printMrBayesModelTextMorphological(originalTree, tree, out, partition, charset, inclParams); break; default: outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(tree->aln->seq_type)); @@ -6474,7 +6523,7 @@ void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& o } /* Protected Overrided Function */ -void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out) { +void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out, bool inclInvariable) { double gamma_shape = 0.0; double p_invar = 0.0; @@ -6491,12 +6540,14 @@ void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string // Always replace +R with +G+I CKP_RESTORE(gamma_shape); - CKP_RESTORE(p_invar); + if (inclInvariable) + CKP_RESTORE(p_invar); checkpoint->endStruct(); out << " shapepr=fixed(" << gamma_shape << ")"; - out << " pinvarpr=fixed(" << p_invar << ")"; + if (inclInvariable) + out << " pinvarpr=fixed(" << p_invar << ")"; } /* Main Function */ diff --git a/tree/phylotree.h b/tree/phylotree.h index f1f2029e0..666dcaf48 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2276,8 +2276,9 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { * @param rate the heterogeneity rate * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree * @param out the ofstream to print to + * @param inclInvariable whether to include invariable sites. defaults to true */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out); + virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true); protected: From 20554e6cfc206907d26be2be2bbdd9a4f9ece21c Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:57:21 +1000 Subject: [PATCH 03/12] Cleanup Move Model Specific Functions to each model class Move other functions from phylotree and phylosupertree to phyloanalysis --- main/phyloanalysis.cpp | 84 +++++++++- model/modeldna.cpp | 104 ++++++++++++- model/modeldna.h | 12 +- model/modelmorphology.cpp | 51 ++++++ model/modelmorphology.h | 12 ++ model/modelprotein.cpp | 95 ++++++++++++ model/modelprotein.h | 12 ++ model/modelsubst.cpp | 40 +++++ model/modelsubst.h | 22 ++- tree/phylosupertree.cpp | 75 ++------- tree/phylosupertree.h | 15 -- tree/phylotree.cpp | 317 +------------------------------------- tree/phylotree.h | 34 ---- utils/tools.cpp | 5 + utils/tools.h | 5 + 15 files changed, 447 insertions(+), 436 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index bfa98f922..3d00ca14c 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2602,6 +2602,86 @@ void printTrees(vector trees, Params ¶ms, string suffix) { treesOut.close(); } +void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParams) { + ofstream out; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + + if (!iqtree->isSuperTree()) { + // Set Outgroup (if available) + if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; + + out << endl; + iqtree->getModel()->printMrBayesModelText(iqtree->getRate(), out, "all", "", false, inclParams); + out << endl; + + out << "end;" << endl; + out.close(); + return; + } + + auto superTree = (PhyloSuperTree*) iqtree; + auto saln = (SuperAlignment*) superTree->aln; + auto size = superTree->size(); + + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } + + // Create Partition + out << endl << " partition iqtree = " << size << ": "; + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << name; + if (part != size - 1) out << ", "; + else out << ";" << endl; + } + + // Set Partition for Use + out << " set partition = iqtree;" << endl; + + // Set Outgroup (if available) + if (!superTree->rooted) out << " outgroup " << superTree->root->name << ";" << endl << endl; + + // Partition-Specific Model Information + for (int part = 0; part < size; part++) { + PhyloTree* currentTree = superTree->at(part); + + // MrBayes Partitions are 1-indexed + out << endl; + currentTree->getModel()->printMrBayesModelText(currentTree->getRate(), out, + convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); + out << endl; + } + out << "end;" << endl; + out.close(); +} + /************************************************************ * MAIN TREE RECONSTRUCTION ***********************************************************/ @@ -3112,8 +3192,8 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (params.mr_bayes_output) { cout << endl << "Writing MrBayes Block Files..." << endl; - iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), false); - iqtree->printMrBayesBlock((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), true); + printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_scheme.nex").c_str(), iqtree, false); + printMrBayesBlockFile((string(params.out_prefix) + ".mr_bayes_model.nex").c_str(), iqtree, true); cout << endl; } diff --git a/model/modeldna.cpp b/model/modeldna.cpp index f2cbc0be0..c798eb577 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -565,6 +565,106 @@ void ModelDNA::setVariables(double *variables) { // } } -string ModelDNA::getModelDNACode() { - return param_spec; +void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + bool equalFreq = freq_type == FREQ_EQUAL; + short nst = 6; + + // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) + // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 + // If it returns a valid dna code, we can extract information from there (needed for user-defined models) + if (!param_spec.empty()) { + // if all equal + if (param_spec.find_first_not_of(param_spec[0]) == string::npos) { + nst = 1; + // if A-G=C-T and everything else is equal to first part of dna code + } else if (param_spec[1] == param_spec[4] && + param_spec[0] == param_spec[2] && param_spec[0] == param_spec[3] && param_spec[0] == param_spec[5]) { + nst = 2; + } + } else if (!name.empty()) { + // Check the name of the model + if (strcmp(name.c_str(), "JC") == 0 || strcmp(name.c_str(), "JC69") == 0 || strcmp(name.c_str(), "F81") == 0) { + nst = 1; + } else if (strcmp(name.c_str(), "K80") == 0 || strcmp(name.c_str(), "K2P") == 0 || strcmp(name.c_str(), "HKY") == 0 || strcmp(name.c_str(), "HKY85") == 0) { + nst = 2; + } + } + + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates="; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + if (hasInvariable) + out << "invgamma"; + else + out << "gamma"; + } + if (hasInvariable) + out << "propinv"; + else + out << "equal"; + + // Rate Categories + if (hasGamma) + out << " ngammacat=" << rate->getNRate(); + + out << ";" << endl; + + // Priors (apart from Fixed Freq) + if (!inclParams) { + // If not include other params, simply set fixed frequency and return + if (!equalFreq) return; + + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + Invariable Sites + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(isSuperTree, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (rate->getPInvar() > 0.0) + out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; + + // Frequency of Nucleotides (+F) + if (equalFreq) + out << " statefreqpr=fixed(equal)"; + else { + // use getStateFrequency instead of state_freq to ensure they sum to 1 + auto* freq = new double[num_states]; + getStateFrequency(freq); + out << " statefreqpr=dirichlet("; + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + + delete[] freq; + } + + // Reversible Rate Matrix + out << " revmatpr=dirichlet("; + for (int i = 0; i < getNumRateEntries(); ++i) { + if (i != 0) out << ", "; + out << rates[i]; + } + + // Close revmatpr + prset + out << ");" << endl; } diff --git a/model/modeldna.h b/model/modeldna.h index 3f9bd7a61..0c252c178 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -115,10 +115,16 @@ class ModelDNA : public ModelMarkov virtual void computeTipLikelihood(PML::StateType state, double *state_lk); /** - * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model - * Returns empty string by default (this is not a dna specific model) + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual string getModelDNACode(); + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); protected: diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index a4df995be..d16c84bb4 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -150,3 +150,54 @@ void ModelMorphology::writeInfo(ostream &out) { out << endl; } } + +void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); + warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); + warnLogStream("Use the Morphological Model in MrBayes with Caution!", out); + + // MrBayes does not support Invariable Modifier for Morph data + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); + } + // MrBayes does not support State Frequency for Morph data + if (freq_type != FREQ_EQUAL) { + warnLogStream("MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)", out); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + // Rate Categories + Gamma + out << "gamma ngammacat=" << rate->getNRate(); + } + else + out << "equal"; + + out << ";" << endl; + + // ctype (ordered or not) + if (strcmp(name.c_str(), "ORDERED") == 0) + out << " ctype ordered;" << endl; + + if (!inclParams) return; + + // Prset Parameters + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(rate, charset, out, false); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + out << ";" << endl; +} diff --git a/model/modelmorphology.h b/model/modelmorphology.h index e9d271dff..f598103bd 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -78,6 +78,18 @@ class ModelMorphology: public ModelMarkov { virtual void readRates(istream &in) noexcept(false); virtual ~ModelMorphology(); + + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 4e5d7196e..854e901d7 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1162,3 +1162,98 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } +void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // Lset Parameters + out << " lset applyto=(" << partition << ") nucmodel=protein rates="; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + if (hasInvariable) + out << "invgamma"; + else + out << "gamma"; + } + if (hasInvariable) + out << "propinv"; + else + out << "equal"; + + // Rate Categories + if (hasGamma) + out << " ngammacat=" << rate->getNRate(); + + out << ";" << endl; + + out << " prset applyto=(" << partition << ")"; + + // Get MrBayes Model + auto aaModelMap = getIqTreeToMrBayesAAModels(); + auto iter = aaModelMap.find(name); + string mappedModel = "gtr"; + + // If model is in map, set mappedModel to the value + if (iter != aaModelMap.end()) + mappedModel = iter->second; + + out << " aamodelpr=fixed(" << mappedModel << ")"; + + // GTR Customization + if (strcmp(mappedModel.c_str(), "gtr") == 0) { + // add rate matrix and state frequencies (mandatory for setting gtr values) + out << " aarevmatpr="; + + // if matrix is GTR20, use dirichlet, else use fixed + if (strcmp(name.c_str(), "GTR20") == 0) + out << "dirichlet("; + else + out << "fixed("; + + for (int i = 0; i < getNumRateEntries(); ++i) { + if (i != 0) out << ", "; + out << rates[i]; + } + out << ")"; + + // Frequency type is never equal to FREQ_EQUAL, even with Poisson + // Frequency is also auto-set if we use a model defined by MrBayes + // use getStateFrequency instead of state_freq to ensure they sum to 1 + out << " statefreqpr=dirichlet("; + auto* freq = new double[num_states]; + getStateFrequency(freq); + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + + delete[] freq; + } + + // if not to include the parameters (for Protein, simply +I, +G, +R) + if (!inclParams) { + out << ";"; + return; + } + + // Freerate (+R) + // Get replacement Gamma Shape + Invariable Sites + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(isSuperTree, charset, out); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + // Invariable Sites (+I) + // Dirichlet is not available here, use fixed + if (rate->getPInvar() > 0.0) + out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; + + out << ";" << endl; +} + diff --git a/model/modelprotein.h b/model/modelprotein.h index e6680eb4a..6dd1f69e6 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -79,6 +79,18 @@ class ModelProtein : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 77363edf5..4b52ebdbd 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -210,6 +210,46 @@ double *ModelSubst::newTransMatrix() { return new double[num_states * num_states]; } +void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable) +{ + double gamma_shape = 0.0; + double p_invar = 0.0; + + if (isSuperTree) { + checkpoint->startStruct("PartitionModelPlen"); + checkpoint->startStruct(charset); + } + checkpoint->startStruct("RateGammaInvar"); + + // Safety Check + if (!checkpoint->hasKey("gamma_shape") || !checkpoint->hasKey("p_invar")) { + outWarning("Could not find replacement for Freerate Distribution in MrBayes Block!"); + checkpoint->endStruct(); + return; + } + + outWarning("FreeRate Distributions (+R) are not available in MrBayes. It will be replaced by +G+I."); + + // Always replace +R with +G+I + CKP_RESTORE(gamma_shape); + if (inclInvariable) + CKP_RESTORE(p_invar); + + checkpoint->endStruct(); + if (isSuperTree) { + checkpoint->endStruct(); + checkpoint->endStruct(); + } + + out << " shapepr=fixed(" << gamma_shape << ")"; + if (inclInvariable) + out << " pinvarpr=fixed(" << p_invar << ")"; +} + +void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + warnLogStream("MrBayes Block Output is not supported by this model of name " + full_name + "!", out); +} + ModelSubst::~ModelSubst() { // mem space pointing to target model and thus avoid double free here diff --git a/model/modelsubst.h b/model/modelsubst.h index 07e7444cf..370737418 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -17,6 +17,7 @@ #include "utils/optimization.h" #include "utils/checkpoint.h" #include "phylo-yaml/statespace.h" +#include "model/rateheterogeneity.h" using namespace std; @@ -393,10 +394,25 @@ class ModelSubst: public Optimization, public CheckpointFactory virtual ModelSubst *getMutationModel() { return this; } /** - * Get the Model DNA 'code', in form 'abcdef', used with ModelDNA model - * Returns empty string by default (this is not a dna specific model) + * Prints the replacement prior settings, in MrBayes, for +R or +R+I. + * @param isSuperTree whether the tree is a super tree + * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree + * @param out the ofstream to print to + * @param inclInvariable whether to include invariable sites. defaults to true */ - virtual string getModelDNACode() { return ""; } + void printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable = true); + + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); /***************************************************** Checkpointing facility diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 8b9057d3a..55df485f5 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -1520,23 +1520,6 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } -void printCharsets(SuperAlignment* saln, size_t size, ofstream &out) -{ - for (int part = 0; part < size; part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - if (!saln->partitions[part]->sequence_type.empty()) - out << saln->partitions[part]->sequence_type << ", "; - string pos = saln->partitions[part]->position_spec; - replace(pos.begin(), pos.end(), ',' , ' '); - out << pos << ";" << endl; - } -} - void PhyloSuperTree::printBestPartitionParams(const char *filename) { try { @@ -1547,7 +1530,19 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) << "begin sets;" << endl; auto *saln = (SuperAlignment*)aln; - printCharsets(saln, size(), out); + for (int part = 0; part < size(); part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; + /*if (saln->partitions[part]->seq_type == SEQ_CODON) + out << "CODON, ";*/ + if (!saln->partitions[part]->sequence_type.empty()) + out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } out << " charpartition mymodels =" << endl; for (int part = 0; part < size(); part++) { @@ -1563,46 +1558,4 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) } catch (ios::failure &) { outError(ERR_WRITE_OUTPUT, filename); } -} - -void PhyloSuperTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out, bool inclInvariable) { - // Call PhyloTree's function inside the correct checkpoint struct - checkpoint->startStruct("PartitionModelPlen"); - checkpoint->startStruct(charset); - PhyloTree::printMrBayesFreeRateReplacement(rate, charset, out, inclInvariable); - checkpoint->endStruct(); - checkpoint->endStruct(); -} - -void PhyloSuperTree::printMrBayesBlock(const char *filename, bool inclParams) -{ - ofstream out = startMrBayesBlock(filename); - auto *saln = (SuperAlignment*)aln; - printCharsets(saln, size(), out); - out << endl; - - // Create Partition - size_type listSize = size(); - out << " partition iqtree = " << listSize << ": "; - for (int part = 0; part < listSize; part++) { - string name = saln->partitions[part]->name; - replace(name.begin(), name.end(), '+', '_'); - out << name; - if (part != listSize - 1) out << ", "; - else out << ";" << endl; - } - - // Set Partition for Use - out << " set partition = iqtree;" << endl; - - // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; - - // Partition-Specific Model Information - for (int part = 0; part < listSize; part++) { - // MrBayes Partitions are 1-indexed - printMrBayesModelText(this, at(part), out, convertIntToString(part + 1), saln->partitions[part]->name, inclParams); - } - out << "end;" << endl; - out.close(); -} +} \ No newline at end of file diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index ccc430d21..7f56f5e64 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -475,21 +475,6 @@ class PhyloSuperTree : public IQTree, public vector */ void printBestPartitionParams(const char *filename); - /** - print mr bayes block with partition information and best model parameters - @param filename output file name - @param inclParams whether to include iqtree params - */ - virtual void printMrBayesBlock(const char *filename, bool inclParams); - - /** - * Prints the replacement prior settings for +R or +R+I. - * @param rate the heterogeneity rate - * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree - * @param out the ofstream to print to - */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true); - /** True when mixed codon with other data type */ bool rescale_codon_brlen; diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index a4f5d16a7..64169c319 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -6244,319 +6244,4 @@ void PhyloTree::doNNI_simple(NNIMove &move) { FOR_NEIGHBOR_IT(nei21->node, node2, it) *(nei21->split) += *((*it)->split); } -} - -/* Private Supplementary Logic Functions */ - -/** - * Find Gamma Rate Categories & Rate Value (propinv for +I, gamma for +G/+R, invgamma for +I+G/+I+R)
- * Default is Equal (+E)
- */ -void printMrBayesRHASInfo(RateHeterogeneity* rate, ofstream &out, bool inclInvariable = true) -{ - out << " rates="; - - // Free Rate should be substituted by +G+I - bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); - bool hasInvariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); - if (hasGamma) { - if (hasInvariable && inclInvariable) - out << "invgamma"; - else - out << "gamma"; - } - if (hasInvariable && inclInvariable) - out << "propinv"; - else - out << "equal"; - - // Rate Categories - if (hasGamma) - out << " ngammacat=" << rate->getNRate(); -} - -void printMrBayesRHASPrior(PhyloTree* originalTree, RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true) -{ - // Freerate (+R) - // Get replacement Gamma Shape + Invariable Sites - if (rate->isFreeRate()) { - originalTree->printMrBayesFreeRateReplacement(rate, charset, out); - } - - // Gamma Distribution (+G/+R) - // Dirichlet is not available here, use fixed - if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; - - // Invariable Sites (+I) - // Dirichlet is not available here, use fixed - if (rate->getPInvar() > 0.0 && inclInvariable) - out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; -} - -void printMrBayesModelTextDNA(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) -{ - ModelSubst* model = tree->getModel(); - bool equalFreq = model->freq_type == FREQ_EQUAL; - short nst = 6; - - // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) - // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 - // If it returns a valid dna code, we can extract information from there (needed for user-defined models) - string dnaCode = model->getModelDNACode(); - string modelStr = model->getName(); - if (!dnaCode.empty()) { - // if all equal - if (dnaCode.find_first_not_of(dnaCode[0]) == string::npos) - nst = 1; - // if A-G=C-T and everything else is equal to first part of dna code - else if (dnaCode[1] == dnaCode[4] && dnaCode[0] == dnaCode[2] && dnaCode[0] == dnaCode[3] && dnaCode[0] == dnaCode[5]) - nst = 2; - } else if (!modelStr.empty()) { - // Check the name of the model - if (strcmp(modelStr.c_str(), "JC") == 0 || strcmp(modelStr.c_str(), "JC69") == 0 || strcmp(modelStr.c_str(), "F81") == 0) - nst = 1; - else if (strcmp(modelStr.c_str(), "K80") == 0 || strcmp(modelStr.c_str(), "K2P") == 0 || strcmp(modelStr.c_str(), "HKY") == 0 || strcmp(modelStr.c_str(), "HKY85") == 0) - nst = 2; - } - - // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) - out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst; - - printMrBayesRHASInfo(tree->getRate(), out); - out << ";" << endl; - - // Priors (apart from Fixed Freq) - if (!inclParams) { - // If not include other params, simply set fixed frequency and return - if (!equalFreq) return; - - out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; - return; - } - - out << " prset applyto=(" << partition << ")"; - - printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); - - // Frequency of Nucleotides (+F) - if (equalFreq) - out << " statefreqpr=fixed(equal)"; - else { - auto* freq = new double[model->num_states]; - model->getStateFrequency(freq); - out << " statefreqpr=dirichlet("; - for (int i = 0; i < model->num_states; ++i) { - if (i != 0) out << ", "; - out << freq[i]; - } - out << ")"; - - delete[] freq; - } - - // Reversible Rate Matrix - int numRateEntries = model->getNumRateEntries(); - auto* rateMat = new double[numRateEntries]; - model->getRateMatrix(rateMat); - - out << " revmatpr=dirichlet("; - for (int i = 0; i < numRateEntries; ++i) { - if (i != 0) out << ", "; - out << rateMat[i]; - } - - // Close revmatpr + prset - out << ");" << endl; - - delete[] rateMat; -}; - -void printMrBayesModelTextProtein(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) -{ - // Lset Parameters - out << " lset applyto=(" << partition << ") nucmodel=protein"; - printMrBayesRHASInfo(tree->getRate(), out); - out << ";" << endl; - - out << " prset applyto=(" << partition << ")"; - - // Model - ModelSubst* model = tree->getModel(); - - // use this instead of getName, getName returns +F, +FU, etc. - string modelName = model->name; - - auto aaModelMap = getIqTreeToMrBayesAAModels(); - auto iter = aaModelMap.find(modelName); - string mappedModel = "gtr"; - - // If model is in map, set mappedModel to the value - if (iter != aaModelMap.end()) - mappedModel = iter->second; - - out << " aamodelpr=fixed(" << mappedModel << ")"; - - if (strcmp(mappedModel.c_str(), "gtr") == 0) { - // add rate matrix and state frequencies (mandatory for setting gtr values) - int numRateEntries = model->getNumRateEntries(); - auto* rateMat = new double[numRateEntries]; - model->getRateMatrix(rateMat); - - out << " aarevmatpr="; - - // if matrix is GTR20, use dirichlet, else use fixed - if (strcmp(modelName.c_str(), "GTR20") == 0) - out << "dirichlet("; - else - out << "fixed("; - - for (int i = 0; i < numRateEntries; ++i) { - if (i != 0) out << ", "; - out << rateMat[i]; - } - out << ")"; - - // Frequency type is never equal to FREQ_EQUAL, even with Poisson - // Frequency is also auto-set if we use a model defined by MrBayes - out << " statefreqpr=dirichlet("; - auto* freq = new double[model->num_states]; - model->getStateFrequency(freq); - for (int i = 0; i < model->num_states; ++i) { - if (i != 0) out << ", "; - out << freq[i]; - } - out << ")"; - } - - // if not to include the parameters (for Protein, simply +I, +G, +R) - if (!inclParams) { - out << ";"; - return; - } - - printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out); - out << ";" << endl; -} - -void printMrBayesModelTextMorphological(PhyloTree* originalTree, PhyloTree* tree, ofstream &out, string &partition, string &charset, bool inclParams) -{ - outWarning("MrBayes only supports Morphological Data with states from {0-9}!"); - outWarning("Morphological Data with states {A-Z} may cause errors!"); - outWarning("Use the Morphological Model in MrBayes with Caution!"); - - out << "[MrBayes only supports Morphological Data with states from {0-9}!]" << endl - << "[Morphological Data with states {A-Z} may cause errors!]" << endl - << "[Use the Morphological Model in MrBayes with Caution!]" << endl; - - // MrBayes does not support Invariable Modifier for Morph data - if (tree->getRate()->isFreeRate() || tree->getRate()->getPInvar() > 0.0) { - outWarning("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!"); - out << "[MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!]" << endl; - } - // MrBayes does not support State Frequency for Morph data - if (tree->getModel()->getFreqType() != FREQ_EQUAL) { - outWarning("MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)"); - out << "[MrBayes does not support non-equal frequencies for Morphological Data! Frequencies are left as the default! (All equal)]" << endl; - } - - // Lset Parameters - out << " lset applyto=(" << partition << ")"; - printMrBayesRHASInfo(tree->getRate(), out, false); - out << ";" << endl; - - // ctype (ordered or not) - if (strcmp(tree->getModel()->name.c_str(), "ORDERED") == 0) - out << " ctype ordered;" << endl; - - if (!inclParams) return; - - // Prset Parameters - out << " prset applyto=(" << partition << ")"; - printMrBayesRHASPrior(originalTree, tree->getRate(), charset, out, false); - out << ";" << endl; -} - -/* Public Supplementary Functions (Used by PhyloSuperTree) */ -ofstream startMrBayesBlock(const char *filename) { - ofstream out; - try { - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - - // Write Warning - out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl - << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl - << "[This should be used as a Template Only.]" << endl << endl; - - // Begin File, Print Charsets - out << "begin mrbayes;" << endl; - } catch (ios::failure &) { - outError(ERR_WRITE_OUTPUT, filename); - } - return out; -} - -void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams) -{ - switch(tree->aln->seq_type) { - case SEQ_DNA: - printMrBayesModelTextDNA(originalTree, tree, out, partition, charset, inclParams); - break; - case SEQ_CODON: - break; - case SEQ_PROTEIN: - printMrBayesModelTextProtein(originalTree, tree, out, partition, charset, inclParams); - break; - case SEQ_BINARY: - break; - case SEQ_MORPH: - printMrBayesModelTextMorphological(originalTree, tree, out, partition, charset, inclParams); - break; - default: - outWarning("MrBayes Block output not supported for sequence type " + getSeqTypeName(tree->aln->seq_type)); - return; - } - out << endl; // Add extra endline after model text for readability -} - -/* Protected Overrided Function */ -void PhyloTree::printMrBayesFreeRateReplacement(RateHeterogeneity *rate, string &charset, ofstream &out, bool inclInvariable) { - double gamma_shape = 0.0; - double p_invar = 0.0; - - checkpoint->startStruct("RateGammaInvar"); - - // Safety Check - if (!checkpoint->hasKey("gamma_shape") || !checkpoint->hasKey("p_invar")) { - outWarning("Could not find replacement for Freerate Distribution in MrBayes Block!"); - checkpoint->endStruct(); - return; - } - - outWarning("FreeRate Distributions (+R) are not available in MrBayes. It will be replaced by +G+I."); - - // Always replace +R with +G+I - CKP_RESTORE(gamma_shape); - if (inclInvariable) - CKP_RESTORE(p_invar); - - checkpoint->endStruct(); - - out << " shapepr=fixed(" << gamma_shape << ")"; - if (inclInvariable) - out << " pinvarpr=fixed(" << p_invar << ")"; -} - -/* Main Function */ -void PhyloTree::printMrBayesBlock(const char *filename, bool inclParams) { - ofstream out = startMrBayesBlock(filename); - // Set Outgroup (if available) - if (!rooted) out << " outgroup " << root->name << ";" << endl << endl; - - printMrBayesModelText(this, this, out, "all", "", inclParams); - out << "end;" << endl; - out.close(); -} +} \ No newline at end of file diff --git a/tree/phylotree.h b/tree/phylotree.h index 666dcaf48..b8a19f986 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -2264,22 +2264,6 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { */ const string& getDistanceFileWritten() const; - /** - print MrBayes block with partition information, and best model parameters (if inclParams is true) - @param filename output file name - @param inclParams whether to include iqtree params - */ - virtual void printMrBayesBlock(const char *filename, bool inclParams); - - /** - * Prints the replacement prior settings for +R or +R+I. - * @param rate the heterogeneity rate - * @param charset the (original) charset of the current partition. An empty string if not a partitioned tree - * @param out the ofstream to print to - * @param inclInvariable whether to include invariable sites. defaults to true - */ - virtual void printMrBayesFreeRateReplacement(RateHeterogeneity* rate, string &charset, ofstream &out, bool inclInvariable = true); - protected: @@ -2527,22 +2511,4 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { void doneProgress(); }; -/** - * print MrBayes formatted text, of model information (and parameters), to an ofstream. - * @param originalTree the original tree, of which is called to retrieve values in checkpoint file (to allow for overriding) - * @param tree tree to print info from - * @param out stream to print to - * @param partition the partition to 'apply' to (all to apply to all partitions) - * @param charset the (original) charset of the current partition. Used to retrieve values in checkpoint file. Input an empty string if this is not a partitioned tree. - * @param inclParams whether to include the parameters of the model - */ -void printMrBayesModelText(PhyloTree* originalTree, PhyloTree* tree, ofstream& out, string partition, string charset, bool inclParams); - -/** - * Start a MrBayes Block, as well as printing a warning. - * @param filename output file name - * @return ofstream for other functions to print to - */ -ofstream startMrBayesBlock(const char *filename); - #endif diff --git a/utils/tools.cpp b/utils/tools.cpp index 5ee56e382..3b6a130c3 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -7911,6 +7911,11 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa } } +void warnLogStream(string warn, ofstream &out) { + outWarning(warn); + out << "[" << warn << "]" << endl; +} + // Cached Map unordered_map iqTreeToMrBayesAAModels; diff --git a/utils/tools.h b/utils/tools.h index ec49d1b13..cb4243e8d 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3733,6 +3733,11 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); +/** + * Prints a warning message to the log and to the ofstream, in a NEXUS format. + */ +void warnLogStream(string warn, ofstream &out); + /** * get a map of iqtree amino acid/protein substitution models to MrBayes amino acid/protein substitution models.
* models which are not supported by mrbayes are not included. GTR20 is assumed as default. From 3494f24c9aea05ef550299f2b25672a6d369d7cf Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:59:19 +1000 Subject: [PATCH 04/12] Cleanup Imports --- tree/phylosupertree.cpp | 2 -- tree/phylotree.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 55df485f5..e7e5991f4 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -20,10 +20,8 @@ #include "phylosupertree.h" #include "alignment/superalignment.h" #include "alignment/superalignmentpairwise.h" -#include "main/phylotesting.h" #include "model/partitionmodel.h" #include "utils/MPIHelper.h" -#include "utils/tools.h" PhyloSuperTree::PhyloSuperTree() : IQTree() diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 64169c319..3d00e0d75 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -37,7 +37,6 @@ #include "model/modelmixture.h" #include "phylonodemixlen.h" #include "phylotreemixlen.h" -#include "main/phylotesting.h" const int LH_MIN_CONST = 1; From 8d1abdbc0e0c58073304144e8f6f268050f67832 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:09:37 +1000 Subject: [PATCH 05/12] Binary Model Support --- main/phyloanalysis.cpp | 5 ++-- model/modelbin.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++ model/modelbin.h | 12 +++++++++ model/modeldna.cpp | 1 + model/modelprotein.cpp | 3 ++- 5 files changed, 78 insertions(+), 3 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 3d00ca14c..a916f72c6 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2655,12 +2655,13 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // Create Partition out << endl << " partition iqtree = " << size << ": "; for (int part = 0; part < size; part++) { + if (part != 0) out << ", "; + string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); out << name; - if (part != size - 1) out << ", "; - else out << ";" << endl; } + out << ";" << endl; // Set Partition for Use out << " set partition = iqtree;" << endl; diff --git a/model/modelbin.cpp b/model/modelbin.cpp index e8291e00a..54429c183 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -76,3 +76,63 @@ string ModelBIN::getNameParams(bool show_fixed_params) { } return retname.str(); } + +void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // MrBayes does not support Invariable Modifier for Binary data + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + + // Free Rate should be substituted by +G (+I not supported) + bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + if (hasGamma) { + // Rate Categories + Gamma + out << "gamma ngammacat=" << rate->getNRate(); + } + else + out << "equal"; + + out << ";" << endl; + + if (!inclParams) { + if (freq_type == FREQ_EQUAL) out << " prset applyto(" << partition << ") statefreqpr=fixed(equal);" << endl; + return; + } + + // Prset Parameters + out << " prset applyto=(" << partition << ")"; + + // Freerate (+R) + // Get replacement Gamma Shape + if (rate->isFreeRate()) { + printMrBayesFreeRateReplacement(rate, charset, out, false); + } + + // Gamma Distribution (+G/+R) + // Dirichlet is not available here, use fixed + if (rate->getGammaShape() > 0.0) + out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + + // State Frequencies + if (freq_type == FREQ_EQUAL) + out << " statefreqpr=fixed(equal);" << endl; + else { + // use getStateFrequency instead of state_freq to ensure they sum to 1 + auto* freq = new double[num_states]; + getStateFrequency(freq); + + out << " statefreqpr=dirichlet("; + for (int i = 0; i < num_states; ++i) { + if (i != 0) out << ", "; + out << freq[i]; + } + out << ")"; + + delete[] freq; + } + + out << ";" << endl; +} diff --git a/model/modelbin.h b/model/modelbin.h index 9c3ab6cf2..b2c257d9e 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -55,6 +55,18 @@ class ModelBIN : public ModelMarkov */ virtual void startCheckpoint(); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param rate the rate information + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + }; #endif diff --git a/model/modeldna.cpp b/model/modeldna.cpp index c798eb577..87b4038bb 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -648,6 +648,7 @@ void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str // use getStateFrequency instead of state_freq to ensure they sum to 1 auto* freq = new double[num_states]; getStateFrequency(freq); + out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { if (i != 0) out << ", "; diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 854e901d7..9385a2ada 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1220,9 +1220,10 @@ void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, // Frequency type is never equal to FREQ_EQUAL, even with Poisson // Frequency is also auto-set if we use a model defined by MrBayes // use getStateFrequency instead of state_freq to ensure they sum to 1 - out << " statefreqpr=dirichlet("; auto* freq = new double[num_states]; getStateFrequency(freq); + + out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { if (i != 0) out << ", "; out << freq[i]; From e8e3e94a1f5caaf1334ac87a83851dece4521dcb Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:21:22 +1000 Subject: [PATCH 06/12] Misc Cleanup Misc Cleanup --- tree/phylosupertree.cpp | 15 +++++++-------- tree/phylosupertree.h | 4 +++- tree/phylotree.cpp | 4 +++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index e7e5991f4..ff3884c17 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -20,6 +20,7 @@ #include "phylosupertree.h" #include "alignment/superalignment.h" #include "alignment/superalignmentpairwise.h" +#include "main/phylotesting.h" #include "model/partitionmodel.h" #include "utils/MPIHelper.h" @@ -1518,17 +1519,16 @@ void PhyloSuperTree::writeBranches(ostream &out) { } } -void PhyloSuperTree::printBestPartitionParams(const char *filename) -{ +void PhyloSuperTree::printBestPartitionParams(const char *filename) { try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(filename); out << "#nexus" << endl << "begin sets;" << endl; - auto *saln = (SuperAlignment*)aln; - - for (int part = 0; part < size(); part++) { + int part; + SuperAlignment *saln = (SuperAlignment*)aln; + for (part = 0; part < size(); part++) { string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); out << " charset " << name << " = "; @@ -1541,9 +1541,8 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) replace(pos.begin(), pos.end(), ',' , ' '); out << pos << ";" << endl; } - out << " charpartition mymodels =" << endl; - for (int part = 0; part < size(); part++) { + for (part = 0; part < size(); part++) { string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); if (part > 0) out << "," << endl; @@ -1556,4 +1555,4 @@ void PhyloSuperTree::printBestPartitionParams(const char *filename) } catch (ios::failure &) { outError(ERR_WRITE_OUTPUT, filename); } -} \ No newline at end of file +} diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 7f56f5e64..60dbdcb9a 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -475,10 +475,12 @@ class PhyloSuperTree : public IQTree, public vector */ void printBestPartitionParams(const char *filename); + /** True when mixed codon with other data type */ bool rescale_codon_brlen; - + int totalNNIs, evalNNIs; + }; #endif diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index 3d00e0d75..b6bfcb595 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -38,6 +38,7 @@ #include "phylonodemixlen.h" #include "phylotreemixlen.h" + const int LH_MIN_CONST = 1; //const static int BINARY_SCALE = floor(log2(1/SCALING_THRESHOLD)); @@ -6243,4 +6244,5 @@ void PhyloTree::doNNI_simple(NNIMove &move) { FOR_NEIGHBOR_IT(nei21->node, node2, it) *(nei21->split) += *((*it)->split); } -} \ No newline at end of file +} + From 8c0be8941d1cd21cd71ffec3aea2f8951a52d5d0 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 17:07:30 +1000 Subject: [PATCH 07/12] Output Files Readability, Default Warning & Help Message --- main/phyloanalysis.cpp | 9 ++++----- model/modelsubst.cpp | 3 ++- utils/tools.cpp | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index a916f72c6..b368eb061 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2625,11 +2625,10 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // Set Outgroup (if available) if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; - out << endl; + out << " [Using Model '" << iqtree->getModelName() << "']" << endl; iqtree->getModel()->printMrBayesModelText(iqtree->getRate(), out, "all", "", false, inclParams); - out << endl; - out << "end;" << endl; + out << endl << "end;" << endl; out.close(); return; } @@ -2674,9 +2673,9 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam PhyloTree* currentTree = superTree->at(part); // MrBayes Partitions are 1-indexed - out << endl; + out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; currentTree->getModel()->printMrBayesModelText(currentTree->getRate(), out, - convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); + convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); out << endl; } out << "end;" << endl; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 4b52ebdbd..612f24ff6 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -247,7 +247,8 @@ void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &chars } void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { - warnLogStream("MrBayes Block Output is not supported by this model of name " + full_name + "!", out); + out << " [MrBayes Block Output is not supported by this model!]" << endl; + outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); } ModelSubst::~ModelSubst() diff --git a/utils/tools.cpp b/utils/tools.cpp index 3b6a130c3..729053201 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -6317,6 +6317,7 @@ void usage_iqtree(char* argv[], bool full_command) { << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl << " Will not output if incompatible data types are used" << endl << " (MrBayes only supports DNA, Codon, Protein, Binary and Morphological data)" << endl + << " There will be no substitution model data output for Lie Markov and Mixture models!" << endl << endl; if (full_command) { From 350bd759a1be5be374bb17391226a36cd296d6ac Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 17:42:07 +1000 Subject: [PATCH 08/12] Fix Edge Case: Importing Values < 0.01 into MrBayes --- model/modelbin.cpp | 10 ++-------- model/modeldna.cpp | 14 ++++---------- model/modelmorphology.cpp | 2 +- model/modelprotein.cpp | 14 ++++---------- model/modelsubst.cpp | 4 ++-- utils/tools.cpp | 8 ++++++++ utils/tools.h | 6 ++++++ 7 files changed, 27 insertions(+), 31 deletions(-) diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 54429c183..b40969dd9 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -114,24 +114,18 @@ void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str // Gamma Distribution (+G/+R) // Dirichlet is not available here, use fixed if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; // State Frequencies if (freq_type == FREQ_EQUAL) out << " statefreqpr=fixed(equal);" << endl; else { - // use getStateFrequency instead of state_freq to ensure they sum to 1 - auto* freq = new double[num_states]; - getStateFrequency(freq); - out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { if (i != 0) out << ", "; - out << freq[i]; + out << minValueCheckMrBayes(state_freq[i]); } out << ")"; - - delete[] freq; } out << ";" << endl; diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 87b4038bb..d2faaec76 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -634,36 +634,30 @@ void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str // Gamma Distribution (+G/+R) // Dirichlet is not available here, use fixed if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; // Invariable Sites (+I) // Dirichlet is not available here, use fixed if (rate->getPInvar() > 0.0) - out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; + out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; // Frequency of Nucleotides (+F) if (equalFreq) out << " statefreqpr=fixed(equal)"; else { - // use getStateFrequency instead of state_freq to ensure they sum to 1 - auto* freq = new double[num_states]; - getStateFrequency(freq); - out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { if (i != 0) out << ", "; - out << freq[i]; + out << minValueCheckMrBayes(state_freq[i]); } out << ")"; - - delete[] freq; } // Reversible Rate Matrix out << " revmatpr=dirichlet("; for (int i = 0; i < getNumRateEntries(); ++i) { if (i != 0) out << ", "; - out << rates[i]; + out << minValueCheckMrBayes(rates[i]); } // Close revmatpr + prset diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index d16c84bb4..4f92423a8 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -197,7 +197,7 @@ void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& o // Gamma Distribution (+G/+R) // Dirichlet is not available here, use fixed if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; out << ";" << endl; } diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 9385a2ada..cca61dab1 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1213,24 +1213,18 @@ void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, for (int i = 0; i < getNumRateEntries(); ++i) { if (i != 0) out << ", "; - out << rates[i]; + out << minValueCheckMrBayes(rates[i]); } out << ")"; // Frequency type is never equal to FREQ_EQUAL, even with Poisson // Frequency is also auto-set if we use a model defined by MrBayes - // use getStateFrequency instead of state_freq to ensure they sum to 1 - auto* freq = new double[num_states]; - getStateFrequency(freq); - out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { if (i != 0) out << ", "; - out << freq[i]; + out << minValueCheckMrBayes(state_freq[i]); } out << ")"; - - delete[] freq; } // if not to include the parameters (for Protein, simply +I, +G, +R) @@ -1248,12 +1242,12 @@ void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, // Gamma Distribution (+G/+R) // Dirichlet is not available here, use fixed if (rate->getGammaShape() > 0.0) - out << " shapepr=fixed(" << rate->getGammaShape() << ")"; + out << " shapepr=fixed(" << minValueCheckMrBayes(rate->getGammaShape()) << ")"; // Invariable Sites (+I) // Dirichlet is not available here, use fixed if (rate->getPInvar() > 0.0) - out << " pinvarpr=fixed(" << rate->getPInvar() << ")"; + out << " pinvarpr=fixed(" << minValueCheckMrBayes(rate->getPInvar()) << ")"; out << ";" << endl; } diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 612f24ff6..5c5675bea 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -241,9 +241,9 @@ void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &chars checkpoint->endStruct(); } - out << " shapepr=fixed(" << gamma_shape << ")"; + out << " shapepr=fixed(" << minValueCheckMrBayes(gamma_shape) << ")"; if (inclInvariable) - out << " pinvarpr=fixed(" << p_invar << ")"; + out << " pinvarpr=fixed(" << minValueCheckMrBayes(p_invar) << ")"; } void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { diff --git a/utils/tools.cpp b/utils/tools.cpp index 729053201..d0081931b 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -7917,6 +7917,14 @@ void warnLogStream(string warn, ofstream &out) { out << "[" << warn << "]" << endl; } +double minValueCheckMrBayes(double origValue) { + if (origValue < 0.01) { + outWarning("MrBayes does not support values < 0.01! Using 0.01 instead..."); + return 0.01; + } + return origValue; +} + // Cached Map unordered_map iqTreeToMrBayesAAModels; diff --git a/utils/tools.h b/utils/tools.h index cb4243e8d..b83cd8846 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3738,6 +3738,12 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa */ void warnLogStream(string warn, ofstream &out); +/** + * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) + */ +double minValueCheckMrBayes(double origValue); + + /** * get a map of iqtree amino acid/protein substitution models to MrBayes amino acid/protein substitution models.
* models which are not supported by mrbayes are not included. GTR20 is assumed as default. From f3d086d6d67bfdcb0fb9cd8f61fce64c8664688f Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 17:47:08 +1000 Subject: [PATCH 09/12] Fix Edge Case: Extra Characters in Charset --- main/phyloanalysis.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index b368eb061..7dd7475d5 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2641,11 +2641,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam string name = saln->partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); out << " charset " << name << " = "; - if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": "; - /*if (saln->partitions[part]->seq_type == SEQ_CODON) - out << "CODON, ";*/ - if (!saln->partitions[part]->sequence_type.empty()) - out << saln->partitions[part]->sequence_type << ", "; + string pos = saln->partitions[part]->position_spec; replace(pos.begin(), pos.end(), ',' , ' '); out << pos << ";" << endl; From 170c7a5d470d46a7cc4e727320a59b791de76a1b Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 17:52:45 +1000 Subject: [PATCH 10/12] Fix +G+I or +R Inputs --- model/modelbin.cpp | 3 +-- model/modeldna.cpp | 3 +-- model/modelmorphology.cpp | 3 +-- model/modelprotein.cpp | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/model/modelbin.cpp b/model/modelbin.cpp index b40969dd9..7632ee98f 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -91,8 +91,7 @@ void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str if (hasGamma) { // Rate Categories + Gamma out << "gamma ngammacat=" << rate->getNRate(); - } - else + } else out << "equal"; out << ";" << endl; diff --git a/model/modeldna.cpp b/model/modeldna.cpp index d2faaec76..4a36e16be 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -602,8 +602,7 @@ void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str out << "invgamma"; else out << "gamma"; - } - if (hasInvariable) + } else if (hasInvariable) out << "propinv"; else out << "equal"; diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 4f92423a8..78c46f3bd 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -173,8 +173,7 @@ void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& o if (hasGamma) { // Rate Categories + Gamma out << "gamma ngammacat=" << rate->getNRate(); - } - else + } else out << "equal"; out << ";" << endl; diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index cca61dab1..24f8a99ff 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1175,8 +1175,7 @@ void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, out << "invgamma"; else out << "gamma"; - } - if (hasInvariable) + } else if (hasInvariable) out << "propinv"; else out << "equal"; From c1650dcce83e74c33be6fb6fa2f14a2ed16c32e0 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:16:00 +1000 Subject: [PATCH 11/12] Fix Issues with Binary Model --- model/modelbin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 7632ee98f..262cbea80 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -80,7 +80,7 @@ string ModelBIN::getNameParams(bool show_fixed_params) { void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { // MrBayes does not support Invariable Modifier for Binary data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { - warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); + warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out); } // Lset Parameters @@ -97,7 +97,7 @@ void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str out << ";" << endl; if (!inclParams) { - if (freq_type == FREQ_EQUAL) out << " prset applyto(" << partition << ") statefreqpr=fixed(equal);" << endl; + if (freq_type == FREQ_EQUAL) out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; return; } @@ -117,7 +117,7 @@ void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, str // State Frequencies if (freq_type == FREQ_EQUAL) - out << " statefreqpr=fixed(equal);" << endl; + out << " statefreqpr=fixed(equal)"; else { out << " statefreqpr=dirichlet("; for (int i = 0; i < num_states; ++i) { From e1bce20948821b443cb17828ef26c76faf5c9727 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:35:59 +1000 Subject: [PATCH 12/12] Fix Issues with Morphology Model --- model/modelmorphology.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 78c46f3bd..ddda51874 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -154,7 +154,7 @@ void ModelMorphology::writeInfo(ostream &out) { void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); - warnLogStream("Use the Morphological Model in MrBayes with Caution!", out); + warnLogStream("Use Morphological Models in MrBayes with Caution!", out); // MrBayes does not support Invariable Modifier for Morph data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { @@ -180,9 +180,10 @@ void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& o // ctype (ordered or not) if (strcmp(name.c_str(), "ORDERED") == 0) - out << " ctype ordered;" << endl; + out << " ctype ordered: " << charset << ";" << endl; - if (!inclParams) return; + // Since morphological doesn't have state frequencies, if there are no parameters to prset, return + if (!inclParams || !rate->isFreeRate() && rate->getGammaShape() <= 0.0) return; // Prset Parameters out << " prset applyto=(" << partition << ")";