From 67d62b3c2e44a2cac0d6c305bb2240b847d913fa Mon Sep 17 00:00:00 2001 From: StefanFlaumberg Date: Fri, 27 Jun 2025 20:06:24 +0300 Subject: [PATCH] Fix the checkAbsentStates functions --- alignment/alignment.cpp | 42 ++++++++++++++++++++---------------- alignment/alignment.h | 5 ++--- alignment/superalignment.cpp | 8 +++---- alignment/superalignment.h | 5 ++--- main/phyloanalysis.cpp | 18 ++-------------- 5 files changed, 32 insertions(+), 46 deletions(-) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index d02ccdd9c..d42e4eee3 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -123,33 +123,37 @@ double chi2prob (int deg, double chi2) } /* chi2prob */ -int Alignment::checkAbsentStates(string msg) { - double *state_freq = new double[num_states]; - computeStateFreq(state_freq); - string absent_states, rare_states; - int count = 0; - // Skip check for PoMo. +void Alignment::checkAbsentStates(string msg) { + // skip checking for PoMo if (seq_type == SEQ_POMO) - return 0; - for (int i = 0; i < num_states; i++) - if (state_freq[i] == 0.0) { + return; + string absent_states, rare_states; + int absent_cnt = 0; + double *state_freqs = new double[num_states]; + computeStateFreq(state_freqs); + for (int x = 0; x < num_states; ++x) { + if (state_freqs[x] == 0.0) { if (!absent_states.empty()) absent_states += ", "; - absent_states += convertStateBackStr(i); - count++; - } else if (state_freq[i] <= Params::getInstance().min_state_freq) { + absent_states += convertStateBackStr(x); + absent_cnt++; + } else if (state_freqs[x] <= Params::getInstance().min_state_freq) { if (!rare_states.empty()) rare_states += ", "; - rare_states += convertStateBackStr(i); + rare_states += convertStateBackStr(x); } - if (count >= num_states-1 && Params::getInstance().fixed_branch_length != BRLEN_FIX) - outError("Only one state is observed in " + msg); + } + delete [] state_freqs; + if (absent_cnt == num_states) + outError("Only gaps observed in " + msg); + if (absent_cnt == num_states - 1) + outWarning("Only one state observed in " + msg); + if (absent_cnt > 0) + outWarning(convertIntToString(absent_cnt) + " states (see below) not observed in " + msg); if (!absent_states.empty()) - cout << "NOTE: State(s) " << absent_states << " not present in " << msg << " and thus removed from Markov process to prevent numerical problems" << endl; + outWarning("State(s) " + absent_states + " not present in " + msg + " and may cause numerical problems"); if (!rare_states.empty()) - cout << "WARNING: States(s) " << rare_states << " rarely appear in " << msg << " and may cause numerical problems" << endl; - delete[] state_freq; - return count; + outWarning("State(s) " + rare_states + " rarely appear in " + msg + " and may cause numerical problems"); } void Alignment::checkSeqName() { diff --git a/alignment/alignment.h b/alignment/alignment.h index 520bb9e9c..d18fe463e 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -484,11 +484,10 @@ class Alignment : public vector, public CharSet, public StateSpace { int getMaxSeqNameLength(); /* - check if some state is absent, which may cause numerical issues + check if some states are absent, which may cause numerical issues @param msg additional message into the warning - @return number of absent states in the alignment */ - virtual int checkAbsentStates(string msg); + virtual void checkAbsentStates(string msg); /** check proper and undupplicated sequence names diff --git a/alignment/superalignment.cpp b/alignment/superalignment.cpp index fe827d2b9..d0781f05a 100644 --- a/alignment/superalignment.cpp +++ b/alignment/superalignment.cpp @@ -1148,11 +1148,9 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, return aln; } -int SuperAlignment::checkAbsentStates(string msg) { - int count = 0; - for (auto it = partitions.begin(); it != partitions.end(); ++it) - count += (*it)->checkAbsentStates("partition " + convertIntToString((it-partitions.begin())+1)); - return count; +void SuperAlignment::checkAbsentStates(string msg) { + for (vector::iterator it = partitions.begin(); it != partitions.end(); ++it) + (*it)->checkAbsentStates("partition " + convertIntToString((it-partitions.begin())+1)); } /* diff --git a/alignment/superalignment.h b/alignment/superalignment.h index 4b607cd06..0aea608ba 100644 --- a/alignment/superalignment.h +++ b/alignment/superalignment.h @@ -171,11 +171,10 @@ class SuperAlignment : public Alignment /* - check if some state is absent, which may cause numerical issues + check if some states are absent, which may cause numerical issues @param msg additional message into the warning - @return number of absent states in the alignment */ - virtual int checkAbsentStates(string msg); + virtual void checkAbsentStates(string msg); /** Quit if some sequences contain only gaps or missing data diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 91083d14c..49000f41e 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3403,22 +3403,8 @@ bool isTreeMixture(Params& params) { void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { - // string dist_file; - // params.startCPUTime = getCPUTime(); - // params.start_real_time = getRealTime(); - - int absent_states = 0; - if (iqtree->isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)iqtree; - for (auto i = stree->begin(); i != stree->end(); i++) - absent_states += (*i)->aln->checkAbsentStates("partition " + (*i)->aln->name); - } else { - absent_states = iqtree->aln->checkAbsentStates("alignment"); - } - if (absent_states > 0) { - cout << "NOTE: " << absent_states << " states (see above) are not present and thus removed from Markov process to prevent numerical problems" << endl; - } - + iqtree->aln->checkAbsentStates("alignment"); + // Make sure that no partial likelihood of IQ-TREE is initialized when PLL is used to save memory if (params.pll) { iqtree->deleteAllPartialLh();