From e8cda8268b4363e1832ca3cac53d3e75df9b95a9 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:44:29 +1000 Subject: [PATCH 1/6] Codon Model --- alignment/alignment.cpp | 66 +++++++++++++++++++++++++-------------- alignment/alignment.h | 6 ++++ main/phyloanalysis.cpp | 4 +-- model/modelbin.cpp | 4 ++- model/modelbin.h | 2 +- model/modelcodon.cpp | 66 +++++++++++++++++++++++++++++++++++++++ model/modelcodon.h | 11 +++++++ model/modeldna.cpp | 3 +- model/modeldna.h | 3 +- model/modelmorphology.cpp | 4 ++- model/modelmorphology.h | 3 +- model/modelprotein.cpp | 4 ++- model/modelprotein.h | 3 +- model/modelsubst.cpp | 2 +- model/modelsubst.h | 11 +++++++ utils/tools.cpp | 38 +++++++++++++--------- utils/tools.h | 6 ++++ 17 files changed, 184 insertions(+), 52 deletions(-) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index 16e5b7d88..495631f5f 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -23,6 +23,7 @@ #include #ifdef USE_BOOST +#include #include #endif @@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria +boost::bimap genetic_code_map; + Alignment::Alignment() : vector() { @@ -1603,7 +1606,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) { (*it) = convertState(*it, seq_type); } */ - + +boost::bimap getGeneticCodeMap() { + if (!genetic_code_map.empty()) return genetic_code_map; + + genetic_code_map.insert({1, genetic_code1}); + genetic_code_map.insert({2, genetic_code2}); + genetic_code_map.insert({3, genetic_code3}); + genetic_code_map.insert({4, genetic_code4}); + genetic_code_map.insert({5, genetic_code5}); + genetic_code_map.insert({6, genetic_code6}); + genetic_code_map.insert({9, genetic_code9}); + genetic_code_map.insert({10, genetic_code10}); + genetic_code_map.insert({11, genetic_code11}); + genetic_code_map.insert({12, genetic_code12}); + genetic_code_map.insert({13, genetic_code13}); + genetic_code_map.insert({14, genetic_code14}); + genetic_code_map.insert({16, genetic_code16}); + genetic_code_map.insert({21, genetic_code21}); + genetic_code_map.insert({22, genetic_code22}); + genetic_code_map.insert({23, genetic_code23}); + genetic_code_map.insert({24, genetic_code24}); + genetic_code_map.insert({25, genetic_code25}); + + return genetic_code_map; +} + void Alignment::initCodon(char *gene_code_id) { // build index from 64 codons to non-stop codons int transl_table = 1; @@ -1613,30 +1641,13 @@ void Alignment::initCodon(char *gene_code_id) { } catch (string &str) { outError("Wrong genetic code ", gene_code_id); } - switch (transl_table) { - case 1: genetic_code = genetic_code1; break; - case 2: genetic_code = genetic_code2; break; - case 3: genetic_code = genetic_code3; break; - case 4: genetic_code = genetic_code4; break; - case 5: genetic_code = genetic_code5; break; - case 6: genetic_code = genetic_code6; break; - case 9: genetic_code = genetic_code9; break; - case 10: genetic_code = genetic_code10; break; - case 11: genetic_code = genetic_code11; break; - case 12: genetic_code = genetic_code12; break; - case 13: genetic_code = genetic_code13; break; - case 14: genetic_code = genetic_code14; break; - case 15: genetic_code = genetic_code15; break; - case 16: genetic_code = genetic_code16; break; - case 21: genetic_code = genetic_code21; break; - case 22: genetic_code = genetic_code22; break; - case 23: genetic_code = genetic_code23; break; - case 24: genetic_code = genetic_code24; break; - case 25: genetic_code = genetic_code25; break; - default: + auto codeMap = getGeneticCodeMap(); + auto found = codeMap.left.find(transl_table); + if (found == codeMap.left.end()) { outError("Wrong genetic code ", gene_code_id); - break; + return; } + genetic_code = found->second; } else { genetic_code = genetic_code1; } @@ -1669,6 +1680,15 @@ void Alignment::initCodon(char *gene_code_id) { // cout << "num_states = " << num_states << endl; } +int Alignment::getGeneticCodeId() { + if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0; + + auto code_map = getGeneticCodeMap(); + auto found = code_map.right.find(genetic_code); + if (found == code_map.right.end()) return 0; + return found->second; +} + int getMorphStates(StrVector &sequences) { char maxstate = 0; for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++) diff --git a/alignment/alignment.h b/alignment/alignment.h index 934591a80..bf7d6d800 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -1010,6 +1010,12 @@ class Alignment : public vector, public CharSet, public StateSpace { */ void extractMapleFile(const std::string& aln_name, const InputType& format); + /** + * Get the numerical id of the genetic code + * @return id the genetic code id, or 0 if not a codon type + */ + int getGeneticCodeId(); + protected: diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 7dd7475d5..d5eeee528 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2626,7 +2626,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam if (!iqtree->rooted) out << " outgroup " << iqtree->root->name << ";" << endl << endl; out << " [Using Model '" << iqtree->getModelName() << "']" << endl; - iqtree->getModel()->printMrBayesModelText(iqtree->getRate(), out, "all", "", false, inclParams); + iqtree->getModel()->printMrBayesModelText(out, "all", "", false, inclParams); out << endl << "end;" << endl; out.close(); @@ -2670,7 +2670,7 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam // MrBayes Partitions are 1-indexed out << " [Partition No. " << convertIntToString(part + 1) << ", Using Model '" << currentTree->getModelName() << "']" << endl; - currentTree->getModel()->printMrBayesModelText(currentTree->getRate(), out, + currentTree->getModel()->printMrBayesModelText(out, convertIntToString(part + 1), saln->partitions[part]->name, true, inclParams); out << endl; } diff --git a/model/modelbin.cpp b/model/modelbin.cpp index 262cbea80..e0100fc26 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -77,7 +77,9 @@ string ModelBIN::getNameParams(bool show_fixed_params) { return retname.str(); } -void ModelBIN::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + RateHeterogeneity* rate = phylo_tree->getRate(); + // MrBayes does not support Invariable Modifier for Binary data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { warnLogStream("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!", out); diff --git a/model/modelbin.h b/model/modelbin.h index b2c257d9e..b6a5024ec 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -65,7 +65,7 @@ class ModelBIN : public ModelMarkov * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index d39c0390a..e20971a22 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1142,6 +1142,72 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) { return score; } +void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { + // NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2 + int nst = fix_kappa ? 1 : 2; + int code = phylo_tree->aln->getGeneticCodeId(); + string mrBayesCode = getMrBayesGeneticCode(code); + RateHeterogeneity* rate = phylo_tree->getRate(); + + if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) { + warnLogStream("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!", out); + } + + if (mrBayesCode.empty()) { + warnLogStream("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1).", out); + mrBayesCode = "universal"; + } + + if (freq_type == FREQ_USER_DEFINED || name.find('_') != string::npos) { // If this model is a Empirical / Semi-Empirical Model + warnLogStream("MrBayes does not support Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); + } + + out << " lset applyto=(" << partition << ") omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; + + if (!inclParams) return; + + out << " prset applyto=(" << partition << ") statefreqpr=dirichlet("; + + // State Frequency Prior + bool isFirst = true; + for (int i = 0; i < num_states; i++) { + if (phylo_tree->aln->isStopCodon(i)) continue; + + if (!isFirst) out << ", "; + else isFirst = false; + + out << state_freq[i]; + } + + out << ")"; + + /* + * TS/TV Ratio (Kappa) + * Ratio Should be: + * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA` (kappa here represents the ts/tv rate ratio) + * `beta(kappa, 1)` when `codon_kappa_style` is `CK_ONE_KAPPA_TS` (kappa here represents the transition rate) + * `beta(1, kappa)` when `codon_kappa_style` is `CK_ONE_KAPPA_TV` (kappa here represents the transversion rate) + * `beta(kappa, kappa2)` when `codon_kappa_style` is `CK_TWO_KAPPA` (kappa here represents the transition rate, and kappa2 represents the transversion rate) + */ + if (!fix_kappa) { + double v1 = codon_kappa_style == CK_ONE_KAPPA_TV ? 1 : kappa; + double v2 = 1; + if (codon_kappa_style == CK_ONE_KAPPA_TV) + v2 = kappa; + else if (codon_kappa_style == CK_TWO_KAPPA) + v2 = kappa2; + + out << " tratiopr=beta(" << v1 << ", " << v2 << ")"; + } + + // DN/DS Ratio (Omega) + // Ratio is set to `dirichlet(omega, 1)` + if (!fix_omega) { + out << " omegapr=dirichlet(" << omega << ", 1)"; + } + + out << ";" << endl; +} void ModelCodon::writeInfo(ostream &out) { if (name.find('_') == string::npos) diff --git a/model/modelcodon.h b/model/modelcodon.h index 5b95c46df..a745f49c8 100644 --- a/model/modelcodon.h +++ b/model/modelcodon.h @@ -210,6 +210,17 @@ class ModelCodon: public ModelMarkov { */ virtual bool getVariables(double *variables); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + }; #endif /* MODELCODON_H_ */ diff --git a/model/modeldna.cpp b/model/modeldna.cpp index 4a36e16be..5a4aae840 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -565,9 +565,10 @@ void ModelDNA::setVariables(double *variables) { // } } -void ModelDNA::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { bool equalFreq = freq_type == FREQ_EQUAL; short nst = 6; + RateHeterogeneity* rate = phylo_tree->getRate(); // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 diff --git a/model/modeldna.h b/model/modeldna.h index 0c252c178..98fb70f1a 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -117,14 +117,13 @@ class ModelDNA : public ModelMarkov /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); protected: diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index ddda51874..8312a6439 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -151,11 +151,13 @@ void ModelMorphology::writeInfo(ostream &out) { } } -void ModelMorphology::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { warnLogStream("MrBayes only supports Morphological Data with states from {0-9}!", out); warnLogStream("Morphological Data with states {A-Z} may cause errors!", out); warnLogStream("Use Morphological Models in MrBayes with Caution!", out); + RateHeterogeneity* rate = phylo_tree->getRate(); + // MrBayes does not support Invariable Modifier for Morph data if (rate->isFreeRate() || rate->getPInvar() > 0.0) { warnLogStream("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!", out); diff --git a/model/modelmorphology.h b/model/modelmorphology.h index f598103bd..6367b1282 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -82,14 +82,13 @@ class ModelMorphology: public ModelMarkov { /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 24f8a99ff..558e938e3 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1162,10 +1162,12 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } -void ModelProtein::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { // Lset Parameters out << " lset applyto=(" << partition << ") nucmodel=protein rates="; + RateHeterogeneity* rate = phylo_tree->getRate(); + // RHAS Specification // Free Rate should be substituted by +G+I bool hasGamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); diff --git a/model/modelprotein.h b/model/modelprotein.h index 6dd1f69e6..b24cb7c1d 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -82,14 +82,13 @@ class ModelProtein : public ModelMarkov /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information * @param out the ofstream to print the result to * @param partition the partition to apply lset and prset to * @param charset the current partition's charset. Useful for getting information from the checkpoint file * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree * @param inclParams whether to include IQTree optimized parameters for the model */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index 5c5675bea..fe4fc8ada 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -246,7 +246,7 @@ void ModelSubst::printMrBayesFreeRateReplacement(bool isSuperTree, string &chars out << " pinvarpr=fixed(" << minValueCheckMrBayes(p_invar) << ")"; } -void ModelSubst::printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { +void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams) { out << " [MrBayes Block Output is not supported by this model!]" << endl; outWarning("MrBayes Block Output is not supported by this model of name '" + name + "'!"); } diff --git a/model/modelsubst.h b/model/modelsubst.h index 370737418..d48ca180d 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -414,6 +414,17 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. Useful for getting information from the checkpoint file + * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree + * @param inclParams whether to include IQTree optimized parameters for the model + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); + /***************************************************** Checkpointing facility *****************************************************/ diff --git a/utils/tools.cpp b/utils/tools.cpp index d0081931b..937c92752 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -7925,22 +7925,30 @@ double minValueCheckMrBayes(double origValue) { return origValue; } -// Cached Map -unordered_map iqTreeToMrBayesAAModels; +const unordered_map iqTreeToMrBayesAAModels = { + {"Poisson", "poisson"}, + {"JTT", "jones"}, + {"Dayhoff", "dayhoff"}, + {"mtREV", "mtrev"}, + {"mtMAM", "mtmam"}, + {"WAG", "wag"}, + {"rtREV", "rtrev"}, + {"cpREV", "cprev"}, + {"VT", "vt"}, + {"Blosum62", "blosum"}, + {"LG", "lg"}, +}; + +// Anything outside of index 10 (Code No. 11) is invalid, leave that as empty string +const string indexedMrBayesGeneticCodes[25] = {"universal", "vertmt", "yeast", "mycoplasma", "invermt", + "ciliate", "", "", "echinoderm", "euplotid", "universal"}; unordered_map getIqTreeToMrBayesAAModels() { - if (!iqTreeToMrBayesAAModels.empty()) return iqTreeToMrBayesAAModels; - - iqTreeToMrBayesAAModels["Poisson"] = "poisson"; - iqTreeToMrBayesAAModels["JTT"] = "jones"; - iqTreeToMrBayesAAModels["Dayhoff"] = "dayhoff"; - iqTreeToMrBayesAAModels["mtREV"] = "mtrev"; - iqTreeToMrBayesAAModels["mtMAM"] = "mtmam"; - iqTreeToMrBayesAAModels["WAG"] = "wag"; - iqTreeToMrBayesAAModels["rtREV"] = "rtrev"; - iqTreeToMrBayesAAModels["cpREV"] = "cprev"; - iqTreeToMrBayesAAModels["VT"] = "vt"; - iqTreeToMrBayesAAModels["Blosum62"] = "blosum"; - iqTreeToMrBayesAAModels["LG"] = "lg"; return iqTreeToMrBayesAAModels; } + +string getMrBayesGeneticCode(int geneticCodeId) { + if (geneticCodeId == 0) return ""; + + return indexedMrBayesGeneticCodes[geneticCodeId - 1]; +} diff --git a/utils/tools.h b/utils/tools.h index b83cd8846..94c05ea08 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3750,4 +3750,10 @@ double minValueCheckMrBayes(double origValue); */ unordered_map getIqTreeToMrBayesAAModels(); +/** + * get the MrBayes equivalent of a genetic code, given the id of the code. Returns and empty string if that code + * is not supported in MrBayes. + */ +string getMrBayesGeneticCode(int geneticCodeId); + #endif From 5148187f3ec097f3a44fc7505f8c517df81337e1 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:50:29 +1000 Subject: [PATCH 2/6] Fix Compiler Error due to Merge Conflicts --- model/modelsubst.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/model/modelsubst.h b/model/modelsubst.h index d48ca180d..71bc0a14f 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -402,18 +402,6 @@ class ModelSubst: public Optimization, public CheckpointFactory */ void printMrBayesFreeRateReplacement(bool isSuperTree, string &charset, ofstream &out, bool inclInvariable = true); - /** - * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
- * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. - * @param rate the rate information - * @param out the ofstream to print the result to - * @param partition the partition to apply lset and prset to - * @param charset the current partition's charset. Useful for getting information from the checkpoint file - * @param isSuperTree whether the tree is a super tree. Useful for retrieving information from the checkpoint file, which has different locations for PhyloTree and PhyloSuperTree - * @param inclParams whether to include IQTree optimized parameters for the model - */ - virtual void printMrBayesModelText(RateHeterogeneity* rate, ofstream& out, string partition, string charset, bool isSuperTree, bool inclParams); - /** * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. From a73632c84ccef9bf8e0b74e11b4407a457741d1e Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:51:36 +1000 Subject: [PATCH 3/6] Fix Codon Model `NucModel` Parameter --- model/modelcodon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index e20971a22..0741c70f8 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1162,7 +1162,7 @@ void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string c warnLogStream("MrBayes does not support Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); } - out << " lset applyto=(" << partition << ") omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; + out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; if (!inclParams) return; From 748fd7d3be4ce9f9c1ffd2e8dd1999ab4ebe5d0f Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:11:31 +1000 Subject: [PATCH 4/6] Fix Empirical Warning + Indentation of Warnings --- model/modelcodon.cpp | 4 ++-- utils/tools.cpp | 4 ++-- utils/tools.h | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index 0741c70f8..f95190743 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1158,8 +1158,8 @@ void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string c mrBayesCode = "universal"; } - if (freq_type == FREQ_USER_DEFINED || name.find('_') != string::npos) { // If this model is a Empirical / Semi-Empirical Model - warnLogStream("MrBayes does not support Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); + if (num_params == 0 || name.find('_') != string::npos) { // If this model is a Empirical / Semi-Empirical Model + warnLogStream("MrBayes does not support Empirical or Semi-Empirical Codon Models. State Frequency will still be set, but no rate matrix will be set.", out); } out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mrBayesCode << ";" << endl; diff --git a/utils/tools.cpp b/utils/tools.cpp index 937c92752..af5865b9a 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -7912,9 +7912,9 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa } } -void warnLogStream(string warn, ofstream &out) { +void warnLogStream(string warn, ofstream &out, string indentation) { outWarning(warn); - out << "[" << warn << "]" << endl; + out << indentation << "[" << warn << "]" << endl; } double minValueCheckMrBayes(double origValue) { diff --git a/utils/tools.h b/utils/tools.h index 94c05ea08..77675a54f 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -3735,8 +3735,11 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa /** * Prints a warning message to the log and to the ofstream, in a NEXUS format. + * @param warn the message to print + * @param out the ofstream to print to + * @param indentation optional indentation included only in ofstream. defaults to two spaces */ -void warnLogStream(string warn, ofstream &out); +void warnLogStream(string warn, ofstream &out, string indentation = " "); /** * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) From 8dc9742a713fd9120ceb4aebc43073175007a6ce Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:27:45 +1000 Subject: [PATCH 5/6] Improve Start-Of-File Warnings --- main/phyloanalysis.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index d5eeee528..b1addbcf8 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -2608,11 +2608,21 @@ void printMrBayesBlockFile(const char* filename, IQTree* &iqtree, bool inclParam out.exceptions(ios::failbit | ios::badbit); out.open(filename); + string provide = "basic models"; + if (inclParams) provide = "optimized values"; + else if (iqtree->isSuperTree()) provide = "basic partition structure and models"; + // Write Warning out << "#nexus" << endl << endl - <<"[This MrBayes Block Declaration provides the retrieved values from the IQTree Run.]" << endl - << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is selected.]" << endl - << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl + <<"[This MrBayes Block Declaration provides the " << provide << " from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'wag' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl; + + if (inclParams) + out << "[However, for those cases, there will still be a rate matrix provided.]" << endl + << "[For DNA, this will still mean the rates may vary outside the restrictions of the model.]" << endl + << "[For Protein, this is essentially a perfect replacement.]" << endl; + + out << "[Furthermore, the Model Parameter '+R' will be replaced by '+G'.]" << endl << "[This should be used as a Template Only.]" << endl << endl; // Begin File, Print Charsets From 17424e28c2277c265e7baf32733c18601ca91fb3 Mon Sep 17 00:00:00 2001 From: Integer Limit <103940576+IntegerLimit@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:39:00 +1000 Subject: [PATCH 6/6] Fix Indentation in alignment.cpp --- alignment/alignment.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index 495631f5f..ee1e9fc76 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -1641,13 +1641,13 @@ void Alignment::initCodon(char *gene_code_id) { } catch (string &str) { outError("Wrong genetic code ", gene_code_id); } - auto codeMap = getGeneticCodeMap(); - auto found = codeMap.left.find(transl_table); + auto codeMap = getGeneticCodeMap(); + auto found = codeMap.left.find(transl_table); if (found == codeMap.left.end()) { outError("Wrong genetic code ", gene_code_id); - return; + return; } - genetic_code = found->second; + genetic_code = found->second; } else { genetic_code = genetic_code1; }