diff --git a/.gitignore b/.gitignore index fe0d324c..926678ab 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ tags test_scripts/iqtree_test_cmds.txt pllrepo/ build/ +cmake-build-debug/ /Default-clang .idea/ .idea/codeStyleSettings.xml diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index e66beff3..c157c0d2 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -23,6 +23,7 @@ #include #ifdef USE_BOOST +#include #include #endif @@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria +boost::bimap genetic_code_map; + Alignment::Alignment() : vector() { @@ -1909,7 +1912,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) { (*it) = convertState(*it, seq_type); } */ - + +boost::bimap getGeneticCodeMap() { + if (!genetic_code_map.empty()) return genetic_code_map; + + genetic_code_map.insert({1, genetic_code1}); + genetic_code_map.insert({2, genetic_code2}); + genetic_code_map.insert({3, genetic_code3}); + genetic_code_map.insert({4, genetic_code4}); + genetic_code_map.insert({5, genetic_code5}); + genetic_code_map.insert({6, genetic_code6}); + genetic_code_map.insert({9, genetic_code9}); + genetic_code_map.insert({10, genetic_code10}); + genetic_code_map.insert({11, genetic_code11}); + genetic_code_map.insert({12, genetic_code12}); + genetic_code_map.insert({13, genetic_code13}); + genetic_code_map.insert({14, genetic_code14}); + genetic_code_map.insert({16, genetic_code16}); + genetic_code_map.insert({21, genetic_code21}); + genetic_code_map.insert({22, genetic_code22}); + genetic_code_map.insert({23, genetic_code23}); + genetic_code_map.insert({24, genetic_code24}); + genetic_code_map.insert({25, genetic_code25}); + + return genetic_code_map; +} + void Alignment::initCodon(char *gene_code_id) { // build index from 64 codons to non-stop codons int transl_table = 1; @@ -1919,30 +1947,13 @@ void Alignment::initCodon(char *gene_code_id) { } catch (string &str) { outError("Wrong genetic code ", gene_code_id); } - switch (transl_table) { - case 1: genetic_code = genetic_code1; break; - case 2: genetic_code = genetic_code2; break; - case 3: genetic_code = genetic_code3; break; - case 4: genetic_code = genetic_code4; break; - case 5: genetic_code = genetic_code5; break; - case 6: genetic_code = genetic_code6; break; - case 9: genetic_code = genetic_code9; break; - case 10: genetic_code = genetic_code10; break; - case 11: genetic_code = genetic_code11; break; - case 12: genetic_code = genetic_code12; break; - case 13: genetic_code = genetic_code13; break; - case 14: genetic_code = genetic_code14; break; - case 15: genetic_code = genetic_code15; break; - case 16: genetic_code = genetic_code16; break; - case 21: genetic_code = genetic_code21; break; - case 22: genetic_code = genetic_code22; break; - case 23: genetic_code = genetic_code23; break; - case 24: genetic_code = genetic_code24; break; - case 25: genetic_code = genetic_code25; break; - default: + auto code_map = getGeneticCodeMap(); + auto found = code_map.left.find(transl_table); + if (found == code_map.left.end()) { outError("Wrong genetic code ", gene_code_id); - break; + return; } + genetic_code = found->second; } else { genetic_code = genetic_code1; } @@ -1975,6 +1986,15 @@ void Alignment::initCodon(char *gene_code_id) { // cout << "num_states = " << num_states << endl; } +int Alignment::getGeneticCodeId() { + if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0; + + auto code_map = getGeneticCodeMap(); + auto found = code_map.right.find(genetic_code); + if (found == code_map.right.end()) return 0; + return found->second; +} + int getMorphStates(StrVector &sequences) { char maxstate = 0; for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++) diff --git a/alignment/alignment.h b/alignment/alignment.h index cb86d39c..2dd4e2a7 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -1059,6 +1059,12 @@ class Alignment : public vector, public CharSet, public StateSpace { */ void extractMapleFile(const std::string& aln_name, const InputType& format); + /** + * Get the numerical id of the genetic code + * @return id the genetic code id, or 0 if not a codon type + */ + int getGeneticCodeId(); + protected: diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 1ee7a05c..83fe563b 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -1330,6 +1330,10 @@ void printOutfilesInfo(Params ¶ms, IQTree &tree) { if (params.optimize_linked_gtr) { cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl; } + + if (params.mr_bayes_output) { + cout << " MrBayes block written to: " << params.out_prefix << ".mr_bayes.nex" << endl; + } cout << endl; } @@ -3307,6 +3311,91 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) { return nullptr; } +void printMrBayesBlockFile(Params ¶ms, IQTree* &iqtree) { + ofstream out; + string filename = string(params.out_prefix) + ".mr_bayes.nex"; + try { + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + + // Write Warning + out << "#nexus" << endl << endl + <<"[This MrBayes Block Declaration provides the basic " + << (iqtree->isSuperTree() ? "partition structure and models" : "models") + << " from the IQTree Run.]" << endl + << "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'gtr' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl + << "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl + << "[This should be used as a Template Only.]" << endl << endl; + + // Begin File, Print Charsets + out << "begin mrbayes;" << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + + if (!iqtree->isSuperTree()) { + out << " [IQTree inferred model " << iqtree->getModelName() << ", "; + iqtree->getModel()->printMrBayesModelText(out, "all", ""); + + out << endl << "end;" << endl; + out.close(); + return; + } + + auto stree = (PhyloSuperTree*) iqtree; + auto saln = (SuperAlignment*) stree->aln; + auto size = stree->size(); + + for (int part = 0; part < size; part++) { + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + + string pos = saln->partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } + + // Create Partition + out << endl << " partition iqtree = " << size << ": "; + for (int part = 0; part < size; part++) { + if (part != 0) out << ", "; + + string name = saln->partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << name; + } + out << ";" << endl; + + // Set Partition for Use + out << " set partition = iqtree;" << endl << endl; + + // Partition-Specific Model Information + for (int part = 0; part < size; part++) { + PhyloTree* curr_tree = stree->at(part); + + out << " [Subset #" << part + 1 << ": IQTree inferred model " << curr_tree->getModelName() << ", "; + curr_tree->getModel()->printMrBayesModelText(out, + convertIntToString(part + 1), + saln->partitions[part]->name); + out << endl; + } + + // Partition Type Settings + if (params.partition_type != TOPO_UNLINKED) { + out << " unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl; + if (params.partition_type != BRLEN_FIX) { + out << " prset applyto=(all) ratepr=variable;" << endl; + if (params.partition_type != BRLEN_SCALE) { + out << " unlink brlens=(all);" << endl; + } + } + } + + out << "end;" << endl; + out.close(); +} + /************************************************************ * MAIN TREE RECONSTRUCTION ***********************************************************/ @@ -3824,8 +3913,14 @@ void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { } if (iqtree->isSuperTree()) { - ((PhyloSuperTree*) iqtree)->computeBranchLengths(); - ((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + auto stree = (PhyloSuperTree*) iqtree; + stree->computeBranchLengths(); + stree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + } + if (params.mr_bayes_output) { + cout << endl << "Writing MrBayes Block Files..." << endl; + printMrBayesBlockFile(params, iqtree); + cout << endl; } cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; diff --git a/main/phylotesting.h b/main/phylotesting.h index 8c0c99f1..1a25caaa 100644 --- a/main/phylotesting.h +++ b/main/phylotesting.h @@ -811,6 +811,13 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type); string detectSeqTypeName(string model_name); +/** + * get string name from a SeqType object + * @param seq_type input sequence type + * @return name + */ +string getSeqTypeName(SeqType seq_type); + /****************************************************/ /* Q MATRICES NESTING CHECK */ /****************************************************/ diff --git a/model/modelbin.cpp b/model/modelbin.cpp index e8291e00..b394e6ab 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -76,3 +76,31 @@ string ModelBIN::getNameParams(bool show_fixed_params) { } return retname.str(); } + +void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) { + RateHeterogeneity* rate = phylo_tree->getRate(); + bool equal_freq = (freq_type == FREQ_EQUAL); + + // Free Rate should be substituted by +G (+I not supported) + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + + // MrBayes's Binary Model is 'F81-like'. + out << "using MrBayes model F81" << (has_gamma ? "+G" : "") << (equal_freq ? "+FQ" : "") << "]" << endl; + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl; + outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!"); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + if (has_gamma) { + // Rate Categories + Gamma + out << "gamma"; + } else + out << "equal"; + + out << ";" << endl; + + if (equal_freq) + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; +} diff --git a/model/modelbin.h b/model/modelbin.h index 9c3ab6cf..a03d059e 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -55,6 +55,14 @@ class ModelBIN : public ModelMarkov */ virtual void startCheckpoint(); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); }; #endif diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index f24a1889..d9858df7 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -1182,6 +1182,41 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) { return score; } +void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset) { + // NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2 + // GTR codon is not available in IQTREE + int nst = fix_kappa ? 1 : 2; + int code = phylo_tree->aln->getGeneticCodeId(); + string mr_bayes_code = getMrBayesGeneticCode(code); + bool code_not_supported = mr_bayes_code.empty(); + RateHeterogeneity* rate = phylo_tree->getRate(); + + string model_name = fix_kappa ? "JC" : "HKY"; + + // If this model is a Empirical / Semi-Empirical Model + if (num_params == 0 || name.find('_') != string::npos) { + nst = 6; + model_name = "GTR"; + } + + out << "using MrBayes model " << model_name << "]" << endl; + + if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) { + out << " [Rate modifiers (+I, +G, +R) ignored, not supported by MrBayes codon models]" << endl; + outWarning("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!"); + } + + if (code_not_supported) { + outWarning("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1)."); + mr_bayes_code = "universal"; + } + + out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mr_bayes_code << "]" << endl; + if (code_not_supported) + out << " [Genetic code " << code << " is not supported by MrBayes, defaulting to universal (code 1)]" << endl; + + out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mr_bayes_code << ";" << endl; +} void ModelCodon::writeInfo(ostream &out) { if (name.find('_') == string::npos) diff --git a/model/modelcodon.h b/model/modelcodon.h index ec85c70d..921dcbce 100644 --- a/model/modelcodon.h +++ b/model/modelcodon.h @@ -222,6 +222,15 @@ class ModelCodon: public ModelMarkov { */ virtual bool getVariables(double *variables); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); + }; #endif /* MODELCODON_H_ */ diff --git a/model/modeldna.cpp b/model/modeldna.cpp index c10317c2..3c44d002 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -564,3 +564,60 @@ void ModelDNA::setVariables(double *variables) { // j++; // } } + +void ModelDNA::printMrBayesModelText(ofstream& out, string partition, string charset) { + bool equal_freq = (freq_type == FREQ_EQUAL); + short nst = 6; + string model_name = "GTR"; + RateHeterogeneity* rate = phylo_tree->getRate(); + + // Find NST value (1 for JC/JC69/F81, 2 for K80/K2P/HKY/HKY85, 6 for SYM/GTR) + // NST is set to 6 by default (above), so check for JC/JC69/F81 and K80/K2P/HKY/HKY85 + // If it returns a valid dna code, we can extract information from there (needed for user-defined models) + if (!param_spec.empty()) { + // if all equal + if (param_spec.find_first_not_of(param_spec[0]) == string::npos) { + nst = 1; + model_name = "JC"; + // if A-G=C-T and everything else is equal to first part of dna code + } else if (param_spec[1] == param_spec[4] && + param_spec[0] == param_spec[2] && param_spec[0] == param_spec[3] && param_spec[0] == param_spec[5]) { + nst = 2; + model_name = "HKY"; + } + } else if (!name.empty()) { + // Check the name of the model + if (strcmp(name.c_str(), "JC") == 0 || strcmp(name.c_str(), "JC69") == 0 || strcmp(name.c_str(), "F81") == 0) { + nst = 1; + model_name = "JC"; + } else if (strcmp(name.c_str(), "K80") == 0 || strcmp(name.c_str(), "K2P") == 0 || strcmp(name.c_str(), "HKY") == 0 || strcmp(name.c_str(), "HKY85") == 0) { + nst = 2; + model_name = "HKY"; + } + } + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_invariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rate_str = "equal"; + if (has_gamma) { + if (has_invariable) { + rate_str = "invgamma"; + model_name.append("+G+I"); + } else { + rate_str = "gamma"; + model_name.append("+G"); + } + } else if (has_invariable) { + rate_str = "propinv"; + model_name.append("+I"); + } + + out << "using MrBayes model " << model_name << (equal_freq ? "+FQ" : "") << "]" << endl; + // NucModel = 4By4: DNA Nucleotides (4 Options, A, C, G, T) + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << nst << " rates=" << rate_str << ";" << endl; + + if (equal_freq) + out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl; +} diff --git a/model/modeldna.h b/model/modeldna.h index 6694edde..51659386 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -114,6 +114,15 @@ class ModelDNA : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); + protected: /** diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index 9ab0316d..0c68fa5c 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -173,3 +173,48 @@ void ModelMorphology::writeInfo(ostream &out) { out << endl; } } + +void ModelMorphology::printMrBayesModelText(ofstream& out, string partition, string charset){ + RateHeterogeneity* rate = phylo_tree->getRate(); + + // Free Rate should be substituted by +G (+I not supported) + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool ordered = strcmp(name.c_str(), "ORDERED") == 0; + + // MrBayes' morph model is 'JC-like' + out << "using MrBayes model " << (ordered ? "ORDERED" : "JC") << (has_gamma ? "+G" : "") << "]" << endl; + + // Warnings + + // General Morph Data Warnings + out << " [Morphological data in MrBayes is only supported with states from {0-9}]" << endl; + out << " [Morphological data containing states {A-Z] may cause errors]" << endl; + outWarning("Use morphological data in MrBayes with caution; only states from {0-9} are supported!"); + + // Unsupported +I + if (rate->isFreeRate() || rate->getPInvar() > 0.0) { + out << " [+I modifier ignored, not supported by MrBayes for morphological data]" << endl; + outWarning("MrBayes does not support Invariable Sites with Morphological Data! +I has been ignored!"); + } + + // Unsupported Freq + if (freq_type != FREQ_EQUAL) { + out << " [Equal frequencies forced, not supported by MrBayes for morphological data]" << endl; + outWarning("MrBayes does not support non-equal frequencies for Morphological Data! Equal frequencies forced!"); + } + + // Lset Parameters + out << " lset applyto=(" << partition << ") rates="; + + if (has_gamma) { + // Rate Categories + Gamma + out << "gamma"; + } else + out << "equal"; + + out << ";" << endl; + + // ctype (ordered or not) + if (ordered) + out << " ctype ordered: " << charset << ";" << endl; +} diff --git a/model/modelmorphology.h b/model/modelmorphology.h index 44b2889d..7f05be10 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -83,6 +83,15 @@ class ModelMorphology: public ModelMarkov { virtual void readRates(istream &in) noexcept(false); virtual ~ModelMorphology(); + + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); }; #endif /* MODELMORPHOLOGY_H_ */ diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index 0b965fdd..4fc7b090 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -1164,3 +1164,53 @@ void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) state_lk[ambi_aa[cstate*2+1]] = 1.0; } +void ModelProtein::printMrBayesModelText(ofstream& out, string partition, string charset) { + RateHeterogeneity* rate = phylo_tree->getRate(); + bool equal_freq = (freq_type == FREQ_EQUAL); + + // Get MrBayes Model + auto aa_model_map = getIqTreeToMrBayesAAModels(); + auto iter = aa_model_map.find(name); + string mapped_model = "gtr"; + + // If model is in map, set mappedModel to the value + if (iter != aa_model_map.end()) + mapped_model = iter->second; + + out << "using MrBayes model " << mapped_model; + + if (equal_freq) + out << "+FQ"; + + // RHAS Specification + // Free Rate should be substituted by +G+I + bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate(); + bool has_invariable = rate->getPInvar() != 0.0 || rate->isFreeRate(); + string rate_str = "equal"; + if (has_gamma) { + if (has_invariable) { + rate_str = "invgamma"; + out << "+G+I"; + } + else { + rate_str = "gamma"; + out << "+G"; + } + } else if (has_invariable) { + rate_str = "propinv"; + out << "+I"; + } + + out << "]" << endl; + + // Lset Parameters + out << " lset applyto=(" << partition << ") nucmodel=protein rates=" << rate_str << ";" << endl; + + out << " prset applyto=(" << partition << ")" << " aamodelpr=fixed(" << mapped_model << ")"; + + if (equal_freq) + out << " statefreqpr=fixed(equal)"; + + out << ";" << endl; +} + diff --git a/model/modelprotein.h b/model/modelprotein.h index e6680eb4..dc578257 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -79,6 +79,15 @@ class ModelProtein : public ModelMarkov */ virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); + private: ModelsBlock *models_block; diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index be09a168..5e45d724 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -212,6 +212,14 @@ double *ModelSubst::newTransMatrix() { return new double[num_states * num_states]; } +void ModelSubst::printMrBayesModelText(ofstream& out, string partition, string charset) { + out << "using MrBayes model GTR+G+I]" << endl; + out << " [Model not supported by MrBayes, defaulting to GTR+G+I (DNA)]" << endl; + outWarning("MrBayes output is not supported by model " + name + ", defaulting to GTR+G+I (DNA)!"); + + out << " lset applyto=(" << partition << ") nucmodel=4by4 nst=" << 6 << " rates=" << "invgamma" << ";" << endl; +} + ModelSubst::~ModelSubst() { // mem space pointing to target model and thus avoid double free here diff --git a/model/modelsubst.h b/model/modelsubst.h index 662dc0a0..33d014b2 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -395,6 +395,15 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual ModelSubst *getMutationModel() { return this; } + /** + * Print the model information in a format that can be accepted by MrBayes, using lset and prset.
+ * By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes. + * @param out the ofstream to print the result to + * @param partition the partition to apply lset and prset to + * @param charset the current partition's charset. + */ + virtual void printMrBayesModelText(ofstream& out, string partition, string charset); + /***************************************************** Checkpointing facility *****************************************************/ diff --git a/utils/tools.cpp b/utils/tools.cpp index 5458d42b..f9bb1313 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -1620,6 +1620,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.include_pre_mutations = false; params.mutation_file = ""; params.site_starting_index = 0; + params.mr_bayes_output = false; // ----------- SPRTA ---------- params.compute_SPRTA = false; @@ -5924,7 +5925,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.make_consistent = true; continue; } - + if (strcmp(argv[cnt], "-mrbayes") == 0) { + params.mr_bayes_output = true; + continue; + } if (argv[cnt][0] == '-') { string err = "Invalid \""; err += argv[cnt]; @@ -6065,6 +6069,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) { if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) if (params.partition_merge == MERGE_NONE) params.partition_merge = MERGE_RCLUSTERF; + + // Set MrBayes Block Output if -mset mrbayes + if (params.model_set == "mrbayes") + params.mr_bayes_output = true; if (params.alisim_active && !params.aln_file && !params.user_file && !params.partition_file && params.tree_gen == NONE) outError("A tree filepath is a mandatory input to execute AliSim when neither Inference mode nor Random mode (generating a random tree) is inactive. Use -t ; or Activate the inference mode by -s ; or Activate Random mode by -t RANDOM{,} where is yh, u, cat, bal, bd{,} stands for Yule-Harding, Uniform, Caterpillar, Balanced, Birth-Death model respectively."); @@ -6278,7 +6286,7 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -pll Use phylogenetic likelihood library (PLL) (default: off)" << endl << " --ninit NUM Number of initial parsimony trees (default: 100)" << endl << " --ntop NUM Number of top initial trees (default: 20)" << endl - << " --nbest NUM Number of best trees retained during search (defaut: 5)" << endl + << " --nbest NUM Number of best trees retained during search (default: 5)" << endl << " -n NUM Fix number of iterations to stop (default: OFF)" << endl << " --nstop NUM Number of unsuccessful iterations to stop (default: 100)" << endl << " --perturb NUM Perturbation strength for randomized NNI (default: 0.5)" << endl @@ -6338,6 +6346,8 @@ void usage_iqtree(char* argv[], bool full_command) { << " -m ...+LMSS Additionally test strand-symmetric models" << endl << " --mset STRING Restrict search to models supported by other programs" << endl << " (raxml, phyml, mrbayes, beast1 or beast2)" << endl + << " If 'mrbayes' is selected, will output a MrBayes" << endl + << " Block File if Data Type is supported." << endl << " --mset STR,... Comma-separated model list (e.g. -mset WAG,LG,JTT)" << endl << " --msub STRING Amino-acid model source" << endl << " (nuclear, mitochondrial, chloroplast or viral)" << endl @@ -6577,8 +6587,7 @@ void usage_iqtree(char* argv[], bool full_command) { // << " -d Calculate the distance matrix inferred from tree" << endl // << " -stats Output some statistics about branch lengths" << endl // << " -comp Compare tree with each in the input trees" << endl; - - + << " -mrbayes Outputs a Mr Bayes block file, to use as a template for future analysis" << endl << endl; if (full_command) { @@ -8768,3 +8777,39 @@ string getOutputNameWithExt(const InputType& format, const string& output_filepa return output_filepath + ".phy"; } } + +double minValueCheckMrBayes(double orig_value) { + if (orig_value < 0.01) { + outWarning("MrBayes does not support values < 0.01! Using 0.01 instead..."); + return 0.01; + } + return orig_value; +} + +const unordered_map iqtree_to_mr_bayes_aa_models = { + {"Poisson", "poisson"}, + {"JTT", "jones"}, + {"Dayhoff", "dayhoff"}, + {"mtREV", "mtrev"}, + {"mtMAM", "mtmam"}, + {"WAG", "wag"}, + {"rtREV", "rtrev"}, + {"cpREV", "cprev"}, + {"VT", "vt"}, + {"Blosum62", "blosum"}, + {"LG", "lg"}, +}; + +// Anything outside of index 10 (Code No. 11) is invalid, leave that as empty string +const string indexed_mr_bayes_genetic_codes[25] = {"universal", "vertmt", "yeast", "mycoplasma", "invermt", + "ciliate", "", "", "echinoderm", "euplotid", "universal"}; + +unordered_map getIqTreeToMrBayesAAModels() { + return iqtree_to_mr_bayes_aa_models; +} + +string getMrBayesGeneticCode(int geneticCodeId) { + if (geneticCodeId == 0) return ""; + + return indexed_mr_bayes_genetic_codes[geneticCodeId - 1]; +} diff --git a/utils/tools.h b/utils/tools.h index fc5fa687..8cf15dcb 100644 --- a/utils/tools.h +++ b/utils/tools.h @@ -2860,6 +2860,11 @@ class Params { * site starting index (for predefined mutations in AliSim) */ int site_starting_index; + + /** + * Whether to output a MrBayes Block File + */ + bool mr_bayes_output; /** * input tree string (instead of a file) @@ -3825,5 +3830,22 @@ double frob_norm (double m[], int n, double scale=1.0); */ string getOutputNameWithExt(const InputType& format, const string& output_filepath); +/** + * ensures a number, to be inputted into MrBayes, is larger than the minimum value for MrBayes (0.01) + */ +double minValueCheckMrBayes(double orig_value); + + +/** + * get a map of iqtree amino acid/protein substitution models to MrBayes amino acid/protein substitution models.
+ * models which are not supported by mrbayes are not included. GTR20 is assumed as default. + */ +unordered_map getIqTreeToMrBayesAAModels(); + +/** + * get the MrBayes equivalent of a genetic code, given the id of the code. Returns and empty string if that code + * is not supported in MrBayes. + */ +string getMrBayesGeneticCode(int geneticCodeId); #endif