Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 20956 lines (19944 sloc) 801 KB
#include <strstream>
#include <fstream.h>
#include <iomanip.h>
#include <unistd.h>
#include <stdio.h>
#include <set>
#include "nexusdefs.h"
#include "xnexus.h"
#include "nexustoken.h"
#include "nexus.h"
#include "taxablock.h"
#include "assumptionsblock.h"
#include "treesblock.h"
#include "discretedatum.h"
#include "discretematrix.h"
#include "charactersblock.h"
#include "charactersblock2.h"
#include "gport.h"
#include "profile.h"
#include "nodeiterator.h"
#include "setreader.h"
#include "treeorder.h"
#include "treedrawer.h"
#include "ntree.h"
#include "stree.h"
#include "containingtree.h"
#include "quartet.h"
#include "version.h"
#include <gsl/gsl_sf_gamma.h>
#include "TreeLib.h"
#include "gtree.h"
#include "treereader.h"
#include "treewriter.h"
#include <time.h>
#include <map>
#include <limits>
#include "brownie.h"
#include <gsl/gsl_math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_block.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_multimin.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_combination.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_statistics_int.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_sort_vector.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_complex_math.h>
#include <gsl/gsl_complex.h>
#include <gsl/gsl_version.h>
#include <gsl/gsl_sf_exp.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_spline.h>
#include "optimizationfn.h"
#include "cdfvectorholder.h"
#include <sstream>
#include <iostream>
#include "superdouble.h"
//took out this section since GTP is built in
//extern "C" {
//#include "gtp.c"
// static long gtp(node n);
// static long strongDup(node n);
// static long numStrongDup(node n);
// static void initgTree(node n, Hash h);
// static void makeTipNameHashHelper(node n);
// Hash makeTipNameHash(node root);
// static void initAncArray(node n);
// double ReturnScore(char *buffer, int unrooted);
// void printUsage(void);
//}
using std::string;
using std::ostringstream;
gsl_rng * r; /* global generator */
/**
* Brownie
* Copyright Brian O'Meara, 2006
* http://wwww.brianomeara.info/brownie
* Released under the GNU Public License
* There is no warranty. However, please let
* me know about any bugs, and I'll try to
* squash them.
*/
/**
* This file is a heavily modified version of the
* basicccmdline.cpp file that comes with the
* Nexus class library.
* Brian O'Meara. February 2006.
*/
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @class BROWNIE
* @file brownie.h
* @file brownie.cpp
* @variable command_maxlen [static int] maximum length of a command entered
*/
/**
* @constructor
*
* Initializes the id data member to "BROWNIE" and calls the FactoryDefaults
* member function to perform the remaining initializations.
*/
BROWNIE::BROWNIE()
{
id = "BROWNIE";
FactoryDefaults();
}
/**
* @destructor
*
* Closes logf or echof if it is open.
*/
BROWNIE::~BROWNIE()
{
if( logf_open )
logf.close();
if (echof_open) {
echof<<"end;";
echof.close();
}
}
/**
* @method CharLabelToNumber [int:protected]
* @param s [nxsstring] the character label to be translated to character number
*
* The code here is identical to the base class version (simply returns 0),
* so the code here should either be modified or this derived version
* eliminated altogether. Under what circumstances would you need to
* modify the default code, you ask? This function should be modified
* to something meaningful if this derived class needs to construct and
* run a SetReader object to read a set involving characters. The SetReader
* object may need to use this function to look up a character label
* encountered in the set. A class that overrides this method should
* return the character index in the range [1..nchar]; i.e., add one to the
* 0-offset index.
*/
int BROWNIE::CharLabelToNumber( nxsstring /*s*/ )
{
return 0;
}
/**
* @method EnteringBlock [virtual void:public]
* @param blockName [nxsstring] the name of the block just entered
*
* Called by the Nexus object when a block named blockName is entered.
* Allows program to notify user of progress in parsing the NEXUS file.
* Virtual function that overrides the pure virtual function in the
* base class Nexus.
*/
void BROWNIE::EnteringBlock( nxsstring blockName )
{
message = "Reading ";
message += blockName;
message += " block...";
PrintMessage();
}
/**
* @method ExitingBlock [virtual void:public]
* @param blockName [nxsstring] the name of the block just exited
*
* Called by the Nexus object when exiting a block named blockName.
* Allows program to notify user of progress in parsing the NEXUS file.
* Virtual function that overrides the pure virtual function in the
* base class Nexus.
*/
void BROWNIE::ExitingBlock( nxsstring /*blockName*/ )
{
}
/**
* @method FactoryDefaults [void:protected]
*
* Sets all data members to their factory default settings:
* <table>
* <tr><th align="left">Variable <th> <th align="left"> Initial Value
* <tr><td> inf_open <td>= <td> false
* <tr><td> logf_open <td>= <td> false
* <tr><td> quit_now <td>= <td> false
* <tr><td> message <td>= <td> ""
* <tr><td> next_command[0] <td>= <td> '\0'
* <tr><td> trees <td>= <td> NULL
* <tr><td> taxa <td>= <td> NULL
* <tr><td> assumptions <td>= <td> NULL
* <tr><td> characters <td>= <td> NULL
* </table>
*/
void BROWNIE::FactoryDefaults()
{
inf_open = false;
logf_open = false;
echof_open=false;
quit_now = false;
quit_onerr = false;
message = "";
next_command[0] = '\0';
gslseedtoprint=time(NULL);
gslseed=gslseedtoprint;
gsl_rng_set(r,gslseed);
trees = NULL;
taxa = NULL;
assumptions = NULL;
characters = NULL;
discretecharacters = NULL;
discretecharloaded = false;
chosenchar=1;
discretechosenchar=0; //starts at index=0;
ratematfixedvector.push_back(0);
ratematassignvector.push_back(0);
freerateletterstring="";
negbounceparam=-1;
nonnegvariables=true;
gsl_vector *userstatefreqvector=gsl_vector_calloc(1);
gsl_matrix *optimaldiscretecharQmatrix=gsl_matrix_calloc(1,1);
gsl_vector *optimaldiscretecharstatefreq=gsl_vector_calloc(1);
gsl_matrix *currentdiscretecharQmatrix=gsl_matrix_calloc(1,1);
gsl_vector *currentdiscretecharstatefreq=gsl_vector_calloc(1);
discretechosenmodel=1;
bestdiscretelikelihood=GSL_POSINF;
optimizationalgorithm=1;
discretechosenstatefreqmodel=3;
geneevolutionchosenstatefreqmodel=4;
geneEvolutionSamplingType=0;
geneEvolutionChosenModel=1;
allchar=false;
globalstates=false;
variablecharonly=false;
numbercharstates=0;
localnumbercharstates=0;
numberoffreeparameters=0;
numberoffreerates=0;
numberoffreefreqs=0;
chosentree=1;
tipvariancetype=0;
progressbartotal=0;
progressbarcount=0;
progressbarprinted=0;
debugmode=false;
maxiterations=1000;
stoppingprecision=1e-7;
confidenceLnLdistance=2.0;
confidenceprecision=1e-5;
stepswithoutprintinglimit=1000;
randomstarts=15;
treefilename="besttrees.tre";
useCOAL=false;
useMS=false;
msbasereps=100000;
contourBrlenToExport=2;
contourMaxRecursions=20;
contourstartingnumbersteps=15;
contourstartingwidth=10;
exportalltrees=true;
COALaicmode=4;
markedmultiplier=5.0;
brlensigma=1.0;
numbrlenadjustments=20;
badgtpcount=0;
stepsize=.1;
npercent=0.95;
detailedoutput=false;
redobad=false;
giveupfactor=100;
outputallatonce=true;
chosenmodel=1;
gsl_vector *optimalvaluescontinuouschar = gsl_vector_calloc(1);
gsl_matrix *optimalVCV = gsl_matrix_calloc(1,1);
//optimalvalueslabels.clear();
gsl_vector *optimalTraitMeans = gsl_vector_calloc(1);
globalchosentaxset="ALL";
citationarray[0]=true;
maxnumspecies=100;
minnumspecies=1;
minsamplesperspecies=3;
maxstartstops=10;
rearrlimit=-1;
steepest=false;
exhaustive=false;
status=true;
jackknifesearch=false;
nreps=5;
jreps=100;
pctdelete=1.0/3.0;
jackrep=0;
showtries=false;
structwt=0.5;
triplettoohigh=false;
gtptoohigh=false;
infinitescore=false;
tripletdistthreshold=0.2; //Sets how often to use NJ tree distances for starting assignments (higher number=more often) and how often to use triplet support
pthreshold=1;
chosensubsampling=2.0;
movefreqvector.push_back(0.80); //initial values of the movefreqvector; set up so first try doing swaps and rerootings, then reassignments
movefreqvector.push_back(0.01);
movefreqvector.push_back(0.01);
movefreqvector.push_back(0.01);
movefreqvector.push_back(0.17);
movefreqvector.push_back(0.0); //do no brlen optimization
sppnumfixed=false;
unrooted=0; //by default, use rooted gene trees
bestscore=GSL_POSINF;
bestscorelocal=GSL_POSINF;
for (int arrayloc=1;arrayloc<20;arrayloc++) {
citationarray[arrayloc]=false;
}
for (int state=0;state<=(maxModelCategoryStates-1);state++) {
staterestrictionvector.push_back(state);
timeslicetimes.push_back(GSL_POSINF);
timeslicemodels.push_back(0);
}
//timeslicetimes[maxModelCategoryStates]=GSL_POSINF; //Use the BMC and BMS code; the size of this vector is the maximum number of time intervals minus 1, and the max num of time intervals is the max number of models
//timeslicemodels[maxModelCategoryStates]=0; //Since we use BMC and BMS code, the size of this vector is the maximum number of model categories
CDFvectorholder bob;
CDFvector=bob.Initialize();
}
/**
* @method FileExists [bool:protected]
* @param fn [const char*] the name of the file to check
*
* Returns true if file named fn already exists,
* false otherwise.
*/
bool BROWNIE::FileExists( const char* fn )
{
bool exists = false;
// if( access( fn, 0 ) == 0 )
// exists = true;
FILE* fp = fopen( fn, "r" );
if( fp != NULL ) {
fclose(fp);
exists = true;
}
return exists;
}
/**
* @method GetFileName [nxsstring:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called whenever a file name needs to be read from either
* the command line or a file. Expects next token to be "="
* followed by the token representing the file name. Call
* this function after, say, the keyword "file" has been
* read in the following LOG command:
* <pre>
* log file=doofus.txt start replace;
* </pre>
* Note that this function will read only "=doofus.txt "
* leaving "start replace;" in the stream for reading
* at a later time.
*/
nxsstring BROWNIE::GetFileName( NexusToken& token )
{
// Eat the equals sign
//
token.GetNextToken();
if( !token.Equals("=") ) {
errormsg = "Expecting an equals sign, but found ";
errormsg += token.GetToken();
errormsg += " instead";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
// Now get the filename itself
//
token.GetNextToken();
return token.GetToken();
}
gsl_rng * BROWNIE::ReturnR()
{
return r;
}
void BROWNIE::ProgressBar(int total)
{
if (total>0) { //so to start it, give a value of total >0 (# reps); after that, feed it ProgressBar(0);
progressbartotal=total;
cout<<"\nProgress:\n0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%\n|"<<flush;
//cout<<"|....|....|....|....|....|....|....|....|....|....|"
}
else {
progressbarcount++;
double sampleratio=(1.0*progressbarcount)/(1.0*progressbartotal); // convert to floating point division
double printratio=progressbarprinted/80.0;
//cout<<sampleratio<<" "<<printratio<<endl;
while (sampleratio>printratio) {
cout<<"*"<<flush;
progressbarprinted++;
printratio=(1.0*progressbarprinted)/80.0;
}
if (progressbarcount==progressbartotal) { //stop it, reinitialize
cout<<"|\n\n"<<flush;
progressbarcount=0;
progressbartotal=0;
progressbarprinted=0;
}
}
}
/**
* @method GetNumber [nxsstring:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called whenever a number needs to be read from either
* the command line or a file. Expects next token to be "="
* followed by the token representing the number. Call
* this function after, say, the keyword "tree" has been
* read in the following LOG command:
* <pre>
* choose tree=5 start replace;
* </pre>
* Note that this function will read only "=5 "
* leaving "start replace;" in the stream for reading
* at a later time.
*/
nxsstring BROWNIE::GetNumber( NexusToken& token )
{
// Eat the equals sign
//
token.GetNextToken();
if( !token.Equals("=") ) {
errormsg = "Expecting an equals sign, but found ";
errormsg += token.GetToken();
errormsg += " instead";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
// Now get the number itself
//
token.GetNextToken();
return token.GetToken();
}
/**
* @method GetNumberOnly [nxsstring:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called whenever a number needs to be read from either
* the command line or a file. Does not expext an equals
*/
nxsstring BROWNIE::GetNumberOnly( NexusToken& token )
{
token.GetNextToken();
return token.GetToken();
}
/**
* @method HandleEndblock [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called when the END or ENDBLOCK command needs to be parsed
* from within the BROWNIE block. Basically just checks to make
* sure the next token in the data file is a semicolon.
*/
void BROWNIE::HandleEndblock( NexusToken& token )
{
// get the semicolon following END or ENDBLOCK token
//
token.GetNextToken();
if( !token.Equals(";") ) {
errormsg = "Expecting ';' to terminate the END or ENDBLOCK command, but found ";
errormsg += token.GetToken();
errormsg += " instead";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
/**
* @method HandleExecuteCmdLine
* used to execute a datafile from the command line when starting the program
*/
void BROWNIE::HandleExecuteCmdLine(nxsstring fn)
{
if( FileExists( fn.c_str() ) )
{
cerr << endl;
cerr << "Opening " << fn << "..." << endl;
ifstream inf( fn.c_str(), ios::binary | ios::in );
inf_open = true;
NexusToken ftoken(inf);
inf_open = true;
ifstream intreefile;
intreefile.open(fn.c_str(),ios::in);
if (!intrees.ReadTrees(intreefile))
{
message="No trees read from file\n";
PrintMessage();
}
intreefile.close();
try {
Execute( ftoken );
}
catch( XNexus x )
{
NexusError( errormsg, x.pos, x.line, x.col );
Reset();
}
assumptions->MakeTaxsetAll();
// if( !taxa->IsEmpty() ) {
// cerr << " TAXA block found" << endl;
// if( logf_open )
// taxa->Report(logf);
// }
if( inf_open )
inf.close();
inf_open = false;
// if( !trees->IsEmpty() ) {
// cerr << " TREES block found" << endl;
// if( logf_open )
// trees->Report(logf);
//need to call this here in case not already set by reading Brownie block
if(!characters->IsEmpty() ) {
if (characters->GetDataType()==6) {
continuouscharacters=characters;
//cout<<"Found continuous characters\n";
}
else {
discretecharacters=characters;
numbercharstates=discretecharacters->GetMaxObsNumStates();
localnumbercharstates=numbercharstates;
discretecharloaded=true;
//cout<<"Found discrete characters\n";
}
}
if(!characters2->IsEmpty() ) {
if (characters2->GetDataType()==6) {
continuouscharacters=characters2;
//cout<<"Found continuous characters\n";
}
else {
discretecharacters=characters2;
numbercharstates=discretecharacters->GetMaxObsNumStates();
localnumbercharstates=numbercharstates;
discretecharloaded=true;
//cout<<"Found discrete characters\n";
}
}
// }
// cerr << " ASSUMPTIONS block found" << endl;
// if( logf_open )
// assumptions->Report(logf);
// }
// if( !characters->IsEmpty() ) {
// cerr << " CHARACTERS block found" << endl;
// if( logf_open )
// characters->Report(logf);
// }
}
else
{
cerr << endl;
cerr << "Oops! Could not find specified file: " << fn << endl;
}
}
/**
* @method HandleExecute [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Handles everything after the EXecute keyword and the terminating
* semicolon. Flushes all blocks before executing file specified,
* and no warning is given of this
*/
void BROWNIE::HandleExecute( NexusToken& token )
{
// Issuing the EXECUTE command from within a file is a no-no
//
if( inf_open ) {
errormsg = "Cannot issue execute command from within a BROWNIE block";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
// Get the file name to execute
//
token.GetNextToken();
if( token.Equals("?") ) {
message="Usage: Exe <file-name>\n\n";
message+="Executes nexus-formatted input file.\nThis file may contain continuous character data, trees, and Brownie blocks.\nThe file name, if put in single quotes, may contain the path to the file.\n\n";
PrintMessage();
token.GetNextToken(); //to remove the remaining token
}
else {
nxsstring fn = token.GetToken();
// get the semicolon terminating the EXECUTE command
//
token.GetNextToken();
if( !token.Equals(";") ) {
errormsg = "Expecting ';' to terminate the EXECUTE command, but found ";
errormsg += token.GetToken();
errormsg += " instead";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
// Before going through with this, make sure we're not going to overwrite
// any stored blocks
bool stuff_stored = !taxa->IsEmpty();
stuff_stored = ( stuff_stored || !trees->IsEmpty() );
stuff_stored = ( stuff_stored || !assumptions->IsEmpty() );
stuff_stored = ( stuff_stored || !characters->IsEmpty() );
if( stuff_stored && UserSaysOk( "Ok to delete?", "Data has already been read and stored" ) )
PurgeBlocks();
else if( stuff_stored ) {
message = "\nExecute command aborted.";
PrintMessage();
return;
}
if( FileExists( fn.c_str() ) )
{
cerr << endl;
cerr << "Opening " << fn << "..." << endl;
ifstream inf( fn.c_str(), ios::binary | ios::in );
inf_open = true;
ifstream intreefile;
intreefile.open(fn.c_str(),ios::in);
if (!intrees.ReadTrees(intreefile))
{
message="No trees read from file\n";
PrintMessage();
}
intreefile.close();
NexusToken ftoken(inf);
try {
Execute( ftoken );
}
catch( XNexus x )
{
NexusError( errormsg, x.pos, x.line, x.col );
Reset();
}
//need to call this here in case not already set by reading Brownie block
assumptions->MakeTaxsetAll();
if(!characters->IsEmpty() ) {
if (characters->GetDataType()==6) {
continuouscharacters=characters;
//cout<<"Found continuous characters\n";
}
else {
discretecharacters=characters;
numbercharstates=discretecharacters->GetMaxObsNumStates();
localnumbercharstates=numbercharstates;
discretecharloaded=true;
//cout<<"Found discrete characters\n";
}
}
if(!characters2->IsEmpty() ) {
if (characters2->GetDataType()==6) {
continuouscharacters=characters2;
//cout<<"Found continuous characters\n";
}
else {
discretecharacters=characters2;
numbercharstates=discretecharacters->GetMaxObsNumStates();
localnumbercharstates=numbercharstates;
discretecharloaded=true;
//cout<<"Found discrete characters\n";
}
}
// if( !taxa->IsEmpty() ) {
// cerr << " TAXA block found" << endl;
// if( logf_open )
// taxa->Report(logf);
// }
if( inf_open )
inf.close();
inf_open = false;
// if( !trees->IsEmpty() ) {
// cerr << " TREES block found" << endl;
// if( logf_open )
// trees->Report(logf);
// }
// if( !assumptions->IsEmpty() ) {
// cerr << " ASSUMPTIONS block found" << endl;
// if( logf_open )
// assumptions->Report(logf);
// }
// if( !characters->IsEmpty() ) {
// cerr << " CHARACTERS block found" << endl;
// if( logf_open )
// characters->Report(logf);
// }
}
else
{
cerr << endl;
cerr << "Oops! Could not find specified file: " << fn << endl;
}
}
}
/**
* @method HandleHelp [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called when the HELP command needs to be parsed
* from within the BROWNIE block.
*/
void BROWNIE::HandleHelp( NexusToken& token )
{
// Retrieve all tokens for this command, stopping only in the event
// of a semicolon or an unrecognized keyword
//
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
break;
}
else {
errormsg = "Unexpected keyword (";
errormsg += token.GetToken();
errormsg += ") encountered reading HELP command.\n\nTry typing \"";
errormsg +=token.GetToken();
errormsg +=" ?\" instead [without the quotes] if you want help for the command.";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
message = "\nGeneral commands:";
message += "\n help -> shows this message";
message += "\n exe -> executes nexus file";
message += "\n log -> log output";
message += "\n echo -> copies your commands into a batch file";
//message += "\n gettree -> loads tree file";
message += "\n blocks -> reports on blocks currently stored";
message += "\n showtree -> displays currently loaded tree(s)";
message += "\n choose -> chooses tree or char for analysis";
message += "\n taxset -> stores a taxset";
message += "\n citation -> outputs list of relevant papers for your analyses";
message += "\n tipvalues -> return list of tip values";
message += "\n quit -> terminates application";
message += "\n\nCharacter evolution:";
message += "\n ratetest -> does censored rate test (original Brownie function)";
message += "\n vcv -> outputs a variance-covariance matrix";
message += "\n (discrete) -> implements discrete character models and reconstructions";
message += "\n [tipvariance] -> allows program to deal with variance in taxon means";
message += "\n model -> sets model of continuous character evolution (OU, BM, etc)";
message += "\n (continuous) -> gets score for chosen taxset for chosen model";
message += "\n [export] -> exports a tree and data in deprecated Pagel format";
message += "\n (simulate) -> simulate discrete or continuous character matrices";
message += "\n loss -> estimate rates of binary character loss on branches";
message += "\n\nSpecies delimitation and tree search:";
message += "\n hs -> perform a heuristic search";
//message += "\n [jackknife] -> perform a jackknife search";
//message += "\n [exhaustive] -> perform an exhaustive search";
message += "\n compare -> compare triplet overlap for coalescent trees";
message += "\n assign -> assign samples to species";
message += "\n accuracy -> compute accuracy of reconstruction";
message += "\n\nNumerical optimization settings:";
message += "\n set -> sets options";
message += "\n numopt -> sets parameters for numerical optimization functions";
message += "\n\nMiscellaneous:";
message += "\n orderbytree -> reorders a datamatrix by order of taxa in a tree";
message += "\n printedgelength -> prints branch lengths";
message += "\n (partitionededge)-> outputs all trees one NNI move away for NNIBS analysis";
message += "\n\nIn development:";
message += "\n [nast]";
message += "\n [timeslice]";
message += "\n [speciationtransform]";
message += "\n [debug]";
message += "\n [Garland]";
message += "\n\nType \"commandname ?\" [without the quotes]\nfor help on any command.\n\n*** IMPORTANT ***\nCommands in brackets (\"[]\") should not be used for published results yet\nCommands in parentheses (\"()\") should be used after checking with Brian O'Meara (omeara.brian@gmail.com) -- they may reflect things in review which might change pending reviewers' comments, for example";
PrintMessage();
}
/**
* @method HandleBlocks [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called when the Blocks command needs to be parsed
* from within the BROWNIE block.
*/
void BROWNIE::HandleBlocks( NexusToken& token )
{
bool finishexecuting=true;
// Retrieve all tokens for this command, stopping only in the event
// of a semicolon or an unrecognized keyword
//
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
break;
}
else if ( token.Equals("?") ) {
message="Usage: Blocks\n\n";
message+="Reports status of loaded nexus blocks\n";
PrintMessage();
finishexecuting=false;
}
else {
errormsg = "Unexpected keyword (";
errormsg += token.GetToken();
errormsg += ") encountered reading Blocks command";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
if (finishexecuting) {
message = "\nNexus blocks currently stored:";
PrintMessage();
if( !taxa->IsEmpty() ) {
cerr << "\n TAXA block found" << endl;
taxa->Report(cerr);
if( logf_open )
taxa->Report(logf);
}
if( !trees->IsEmpty() ) {
cerr << "\n TREES block found" << endl;
trees->Report(cerr);
if( logf_open )
trees->Report(logf);
}
if( !assumptions->IsEmpty() ) {
cerr << "\n ASSUMPTIONS block found" << endl;
assumptions->Report(cerr);
if( logf_open )
assumptions->Report(logf);
}
if( !characters->IsEmpty() ) {
cerr << "\n CHARACTERS block found" << endl;
characters->Report(cerr);
if( logf_open )
characters->Report(logf);
}
if( !characters2->IsEmpty() ) {
cerr << "\n CHARACTERS2 block found" << endl;
characters2->Report(cerr);
if( logf_open )
characters2->Report(logf);
}
}
}
/**
* @method HandleDebug [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
*/
void BROWNIE::HandleDebug( NexusToken& token )
{
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
debugmode=true;
break;
}
else {
errormsg = "Unexpected keyword (";
errormsg += token.GetToken();
errormsg += ") encountered reading Debug command";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
message="Now entering debug mode\n";
PrintMessage();
}
/**
* @method HandleNoQuitOnErr [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
*/
void BROWNIE::HandleNoQuitOnErr( NexusToken& token )
{
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
quit_onerr=false;
break;
}
else {
errormsg = "Unexpected keyword (";
errormsg += token.GetToken();
errormsg += ") encountered reading NoQuitOnErr command";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
message="Will not quit on error\n";
PrintMessage();
}
/**
* @method HandleNoQuitOnErr [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
*/
void BROWNIE::HandleQuitOnErr( NexusToken& token )
{
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
quit_onerr=true;
break;
}
else {
errormsg = "Unexpected keyword (";
errormsg += token.GetToken();
errormsg += ") encountered reading QuitOnErr command";
throw XNexus( errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
}
message="Will instantly quit on error\n";
PrintMessage();
}
double BROWNIE::AIC(double neglnL, int K) {
double AIC = 2.0 * (1.0*K+neglnL);
return AIC;
}
double BROWNIE::AICc(double neglnL, int K, int N) {
double AICc=GSL_POSINF;
if ((N-K-1)>0) {
AICc= 2.0 * (1.0*K+neglnL) + 2.0*K*(K+1.0)/(1.0*(N-K-1.0));
}
return AICc;
}
/**
* @method HandleGettrees [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called when the GETTREES command needs to be parsed
* from within the BROWNIE block.
*
* If there are trees already in the executed Nexus block, no need
* to run this.
*/
void BROWNIE::HandleGettrees( NexusToken& token )
{
if( !trees->IsEmpty() ) {
if( UserSaysOk( "Ok to delete?", "Trees have already been read and stored" ) ) {
Detach( trees );
delete trees;
trees = new TreesBlock(*taxa);
Add( trees );
}
else {
message = "\nGetTrees command aborted.";
PrintMessage();
return;
}
}
nxsstring fn;
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
break;
}
else if( token.Abbreviation("File") ) {
fn=GetFileName(token);
break;
}
else {
fn=token.GetToken();
}
}
//input stream
ifstream intreefile;
intreefile.open(fn.c_str(),ios::in);
if (!intrees.ReadTrees(intreefile))
{
errormsg="Failed to read trees";
throw XNexus (errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn() );
}
intreefile.close();
}
/**
* @method HandleShowtree [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
* Called when the SHOWTREE command needs to be parsed
* from within the BROWNIE block.
*/
void BROWNIE::HandleShowtree( NexusToken& token )
{
nxsstring numbernexus;
bool finishexecuting=true;
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
if (finishexecuting) {
intrees.ShowTrees(cout);
}
break;
}
else if( token.Abbreviation("?") ) {
message="Usage: ShowTree\nDisplays all loaded trees. No option yet for just displaying the chosen tree.";
PrintMessage();
finishexecuting=false;
}
}
}
/**
* @method HandleHeuristicSearch [void:protected]
* @param token [NexusToken&] the token used to read from in
* @throws XNexus
*
*/
void BROWNIE::HandleHeuristicSearch( NexusToken& token )
{
citationarray[3]=true;
nxsstring numbernexus;
bool finishexecuting=true;
for(;;)
{
token.GetNextToken();
if( token.Equals(";") ) {
if (finishexecuting) {
if(exhaustive) {
DoExhaustiveSearch();
}
else if (!jackknifesearch) {
DoHeuristicSearch();
}
else {
ofstream jacktreef;
nxsstring jacktreename=treefilename;
jacktreename+=".jack.tre";
jacktreef.open(jacktreename.c_str());
jacktreef<<"#nexus\nbegin trees;\n"; //change these couts to jacktreef
jacktreef.close();
for (jackrep=1;jackrep<=jreps;jackrep++) { //a jackknife search consists of several heuristic searches, with the weights of several input trees set to zero
//Each gene tree's weight is used with the jackknife proportion to get a deletion vector; using the weight ensures that the genes have the same expectation of weighted representation in the final set (think of combining bootstrap samples for one gene with regular samples for another).
//NOTE: Need to deal with tree weights properly in GetGTPScoreNew; have not added anything to GetTripletScore; need to deal with how tree weights are used to calculate when trees come from different genes [perhaps add a new weight vector?]
//Use this in place of the original for ..intrees to only deal with selected trees
// for (int i=0;i<intrees.GetNumTrees();i++) {
// bool usethistree=true;
// if (jackknifesearch) {
// if (jackknifevector[i]==0) {
// usethistree=false;
// }
// }
// if (usethistree) {
// STUFF originally just under the intrees for loop
// }
// }
DoHeuristicSearch();
jacktreef.open(jacktreename.c_str(), ios::out | ios::app);
jacktreef<<jackknifetreestooutput;
jacktreef.close();
}
jacktreef.open(jacktreename.c_str(), ios::out | ios::app);
jackknifesearch=false;
jacktreef<<"end;";
jacktreef.close();
}
}
break;
}
else if( token.Abbreviation("?") ) {
if (!jackknifesearch) {
message="Usage: HSearch [options]\nDoes a heuristic search ";
}
else {
message="Usage: Jackknife [options]\nDoes a jackknife search, deleting input trees ";
}
message+="Available options:\n\n";
message+="Keyword ---- Option type --------------------------- Current setting --";
if (jackknifesearch) {
message+="\nJReps <integer-value> ";
message+=jreps;
message+="\nPctDelete <real-value> ";
message+=pctdelete;
}
message+="\nNReps <integer-value> ";
message+=nreps;
//message+="\nTimeLimit <integer-value>|None None";
//message+="\nClock Wall|CPU Wall";
message+="\nRearrLimit <integer-value>|None ";
if (rearrlimit<0) {
message+="None";
}
else {
message+=rearrlimit;
}
//message+="\nAssignFixed No|Yes Yes";
//message+="\nSppNumFixed No|Yes Yes";
message+="\nMaxNumSpp <integer-value> ";
message+=maxnumspecies;
message+="\nMinNumSpp <integer-value> ";
message+=minnumspecies;
message+="\nMinSamp <integer-value> ";
message+=minsamplesperspecies;
message+="\nStructWt <double> ";
message+=structwt;
message+="\nPThreshold <double> ";
message+=pthreshold;
message+="\nSubsample <double> ";
message+=chosensubsampling;
message+="\nMoveFreq (number number number number number [number]) (";
for (int i=0;i<6;i++) {
message+=movefreqvector[i];
message+=" ";
}
message+=")";
message+="\nFile <file-name> ";
message+=treefilename;
//message+="\nRecordSearch No|Yes No";
//message+="\nSearchFile <file-name> ";
//message+="\nStatus No|Yes Yes";
// message+="\nSteepest No|Yes No";
//message+="\nExhaustive No|Yes No";
message+="\nShowTries No|Yes ";
if (showtries) {
message+="Yes";
}
else {
message+="No";
}
message+="\nCOAL: No|Yes ";
if (useCOAL) {
message+="Yes";
}
else {
message+="No";
}
message+="\nMS: No|Yes ";
if (useMS) {
message+="Yes";
}
else {
message+="No";
}
message+="\nAIC_mode 0|1|2|3|4 ";
message+=COALaicmode;
message+="\nGridWidth <double> ";
message+=contourstartingwidth;
message+="\nGridSize <integer-value> ";
message+=contourstartingnumbersteps;
message+="\nMaxRecursions <integer-value> ";
message+=contourMaxRecursions;
message+="\nBranch_export 0|1|2 ";
message+=contourBrlenToExport;
message+="\n\nNReps: Number of random starting species trees to use";
//message+="\nTimeLimit: Limit search to X seconds";
//message+="\nClock: Count seconds for time limit using actual elapsed time ('Wall'->Clock on a wall) or CPU time"; //NOte to self: see discussion online of time() fn and clock() fn in C++
message+="\nRearrLimit: Limit search to X rearrangements for each nrep";
//message+="\nAssignFixed: Assignment of gene samples to species is not optimized during a search";
//message+="\nSppNumFixed: The total number of species is not optimized during the search (if set to No, then AssignFixed is also set to No)";
message+="\nMaxNumSpp: The maximum number of species to split the samples into (only relevant if SppNumFixed==No)";
message+="\nMinSamp: The minimum number of samples per species (NOT the number of species)";
message+="\nMoveFreq: Sets the relative proportion of times to try\n\t1) Species tree branch swaps\n\t2) Moving samples from one species to another\n\t3) Increasing the number of species\n\t4) Decreasing the number of species\n\t5) Attempt to reroot the species tree\n If these don't sum to one, the program will automatically correct this.";
//message+="\nRecordSearch: Output each step to a file: assignments go to a .txt file, and trees go to a .tre file";
//message+="\nSearchFile: If RecordSearch==Yes, the prefix to use for the output files.";
// message+="\nStatus: Output status of search to screen (and a log file, if open).";
// message+="\nSteepest: Whether to look at all rearrangements and then take the best one or just take the first better one.";
message+="\nSubsample: How extensively to try taxon reassignments on leaf splits. \n\tA value of 1 means try all of the possible reassignments, \n\ta value of 2 means try the square root of all the possible assignments,\n\t3 means the cube root, etc. A higher number means a faster but less effective search.\n\tThe program won't let you try fewer than 10 assignments on average.";
message+="\nMS: Use Hudson's program MS to simulate gene trees to estimate probabilities. MAKE SURE MS IS INSTALLED SOMEWHERE ACCESSIBLE TO BROWNIE.";
message+="\nCOAL: Use Degnan's program COAL to optimize the likelihood of the species delimitation and tree rather than the semiparametric penalty function. MAKE SURE COAL IS INSTALLED SOMEWHERE ACCESSIBLE TO BROWNIE.";
message+="\nAIC_mode: When using COAL, use the 0: likelihood as the penalty term, 1: AIC value (k=number of species), 2: AICc with n=number of genes, 3: AICc with n=number of samples, 4: AICc with n=(number of genes) * (number of samples)";
message+="\nGridWidth, GridSize, MaxRecursions all affect grid search";
message+="\nBranch_export: if set to 0, returns a single estimate of the best branch lengths on the species tree. If set to 1, returns a table of equally-good branch lengths. If set to 2, returns a table of the best and neighboring branch lengths, suitable for doing a contour plot of score versus branch lengths";
PrintMessage();
finishexecuting=false;
}
else if(token.Abbreviation("NReps")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
nreps=atoi( numbernexus.c_str() ); //convert to int
}
else if(token.Abbreviation("MAXNumspp")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
maxnumspecies=atoi( numbernexus.c_str() ); //convert to int
message="There must be no more than ";
message+=maxnumspecies;
message+=" species total.";
PrintMessage();
}
else if(token.Abbreviation("MINSamp")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
minsamplesperspecies=GSL_MIN(atoi( numbernexus.c_str() ),taxa->GetNumTaxonLabels()); //convert to int
message="All species must now have at least ";
message+=minsamplesperspecies;
message+=" samples in them.";
PrintMessage();
}
else if(token.Abbreviation("REArrlimit")) {
nxsstring numbernexus;
message="Rearrangement limit set to ";
numbernexus = GetFileName(token);
if (numbernexus[0]=='n' || numbernexus[0]=='N') {
rearrlimit=-1;
message+="None";
}
else {
rearrlimit=atoi(numbernexus.c_str() );
message+=rearrlimit;
}
PrintMessage();
}
else if( token.Abbreviation("STEepest") ) {
nxsstring yesnoreplace=GetFileName(token);
if (yesnoreplace[0] == 'n' || yesnoreplace[0] == 'N') {
steepest=false;
}
else {
steepest=true;
cout<<"%%%%%%%%%%%%%% Warning: steepest hasn't been tested everywhere %%%%%%%%%%%%%%%%%%%"<<endl;
}
}
else if( token.Abbreviation("EXhaustive") ) {
nxsstring yesnoreplace=GetFileName(token);
if (yesnoreplace[0] == 'n' || yesnoreplace[0] == 'N') {
exhaustive=false;
}
else {
exhaustive=true;
message="Wow, an exhaustive search.";
PrintMessage();
}
}
else if( token.Abbreviation("SHOwtries") ) {
nxsstring yesnoreplace=GetFileName(token);
if (yesnoreplace[0] == 'n' || yesnoreplace[0] == 'N') {
showtries=false;
}
else {
showtries=true;
}
}
else if( token.Abbreviation("COAL") ) {
nxsstring yesnoreplace=GetFileName(token);
if (yesnoreplace[0] == 'n' || yesnoreplace[0] == 'N') {
useCOAL=false;
}
else {
useCOAL=true;
useMS=false;
message="This will use COAL for the search";
PrintMessage();
}
}
else if( token.Abbreviation("MS") ) {
nxsstring yesnoreplace=GetFileName(token);
if (yesnoreplace[0] == 'n' || yesnoreplace[0] == 'N') {
useMS=false;
}
else {
useMS=true;
useCOAL=false;
message="This will use ms for the search";
PrintMessage();
if (movefreqvector[5]==0) {
movefreqvector[0]=0.1;
movefreqvector[1]=0.1;
movefreqvector[2]=0.1;
movefreqvector[3]=0.1;
movefreqvector[4]=0.1;
movefreqvector[5]=0.5;
message="Now allocating 50% of moves to branch length changes";
PrintMessage();
}
}
}
else if (token.Abbreviation("AIC_mode")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
int AICmoderaw=atoi( numbernexus.c_str() ); //convert to int
if (AICmoderaw==0) {
message="Using likelihood as penalty function";
COALaicmode=AICmoderaw;
PrintMessage();
}
else if (AICmoderaw==1) {
message="Using AIC as penalty function";
COALaicmode=AICmoderaw;
PrintMessage();
}
else if (AICmoderaw==2) {
message="Using AICc as penalty function, with n=number of genes";
COALaicmode=AICmoderaw;
PrintMessage();
}
else if (AICmoderaw==3) {
message="Using AICc as penalty function, with n=number of samples";
COALaicmode=AICmoderaw;
PrintMessage();
}
else if (AICmoderaw==4) {
message="Using AICc as penalty function, with n=(number of genes) * (number of samples)";
COALaicmode=AICmoderaw;
PrintMessage();
}
else {
errormsg="You entered an unrecognized option for AIC_mode: should be an integer 0-4, but you entered ";
errormsg+=AICmoderaw;
throw XNexus( errormsg);
}
}
else if (token.Abbreviation("BRANch_export")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
int contourBrlenToExportraw=atoi( numbernexus.c_str() ); //convert to int
if (contourBrlenToExportraw==0) {
message="Just export a single branch length for each best species topology";
contourBrlenToExport=contourBrlenToExportraw;
PrintMessage();
}
else if (contourBrlenToExportraw==1) {
message="Export a table of equally-good branch lengths for each best species topology";
contourBrlenToExport=contourBrlenToExportraw;
PrintMessage();
}
else if (contourBrlenToExportraw==2) {
message="Export a table from a grid of possible branch lengths, including suboptimal ones (good for plotting contours)";
contourBrlenToExport=contourBrlenToExportraw;
PrintMessage();
}
else {
errormsg="You entered an unrecognized option for Branch_export: should be an integer 0-2, but you entered ";
errormsg+=contourBrlenToExportraw;
throw XNexus( errormsg);
}
}
else if(token.Abbreviation("MAXRecursions")) {
nxsstring numbernexus;
numbernexus = GetFileName(token);
int contourMaxRecursionsraw=atoi(numbernexus.c_str() );
if (contourMaxRecursionsraw>0) {
message="Max recursions per grid search set to ";
contourMaxRecursions=contourMaxRecursionsraw;
message+=contourMaxRecursions;
PrintMessage();
}
else {
errormsg="You entered an inappropriate option for MaxRecursions: should be an integer >0, but you entered ";
errormsg+=contourMaxRecursionsraw;
throw XNexus( errormsg);
}
}
else if(token.Abbreviation("GRIDSize")) {
nxsstring numbernexus;
numbernexus = GetFileName(token);
int contourstartingnumberstepsraw=atoi(numbernexus.c_str() );
if (contourstartingnumberstepsraw>0) {
message="Number of steps on each axis in grid search set to ";
if (GSL_IS_ODD (contourstartingnumberstepsraw)==0) {
contourstartingnumberstepsraw++;
}
contourstartingnumbersteps=1.0*contourstartingnumberstepsraw; //is actually a double
message+=contourstartingnumbersteps;
message+=" even numbers increased to next odd number (so the current best value is the center of the grid)";
PrintMessage();
}
else {
errormsg="You entered an inappropriate option for GridSize: should be an integer >0, but you entered ";
errormsg+=contourstartingnumberstepsraw;
throw XNexus( errormsg);
}
}
else if(token.Abbreviation("GRIDWidth")) {
nxsstring numbernexus;
numbernexus = GetFileName(token);
double contourstartingwidthraw=atof(numbernexus.c_str() );
if (contourstartingwidthraw>0) {
message="Starting maximum branch length for each branch in grid search set to ";
contourstartingwidth=contourstartingwidthraw;
message+=contourstartingwidth;
message+=" times the best length";
PrintMessage();
}
else {
errormsg="You entered an inappropriate option for GridWidth: should be a number >0, but you entered ";
errormsg+=contourstartingwidthraw;
throw XNexus( errormsg);
}
}
else if( token.Abbreviation("MoveFreq") ) {
token.GetNextToken();
token.GetNextToken(); //eat the equals sign
vector<double> temporarymovefreqvector;
if (!token.Equals("(")) {
errormsg="Expecting next token to be a \'(\' but instead got ";
errormsg+=token.GetToken();
throw XNexus( errormsg);
}
int inputcount=0;
while (!token.Equals(")")) {
nxsstring numbernexus;
numbernexus=GetNumberOnly(token);
if (numbernexus!=")") {
temporarymovefreqvector.push_back(atof( numbernexus.c_str() ));
inputcount++;
}
else {
break;
}
}
if (inputcount!=5 && inputcount!=6) {
errormsg="You should have entered five (or six) frequencies, you entered ";
errormsg+=inputcount;
throw XNexus( errormsg);
}
else {
double sumoffreqs=temporarymovefreqvector[0]+temporarymovefreqvector[1]+temporarymovefreqvector[2]+temporarymovefreqvector[3]+temporarymovefreqvector[4];
if (inputcount==6) {
sumoffreqs+=temporarymovefreqvector[5];
}
else {
temporarymovefreqvector.push_back(0);
}
movefreqvector.clear();
for (int i=0; i<temporarymovefreqvector.size(); i++) {
movefreqvector[i]=(temporarymovefreqvector[i])/sumoffreqs;
}
}
}
else if(token.Abbreviation("MINNumspp") || token.Abbreviation("MINUMSP") ) { //to deal with typos
nxsstring numbernexus;
numbernexus = GetNumber(token);
minnumspecies=atoi( numbernexus.c_str() ); //convert to int
message="There must be at least ";
message+=minnumspecies;
message+=" species total.";
PrintMessage();
}
else if(token.Abbreviation("FIlename")) {
treefilename=GetFileName(token);
}
else if(token.Abbreviation("STRuctwt")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
double proposedwt=atof( numbernexus.c_str() ); //convert to double
if ((proposedwt<0) || (proposedwt>1)) {
errormsg="Error: The weight for population substructure must be between 0 and 1 (inclusive)";
throw XNexus( errormsg);
}
else {
structwt=proposedwt;
message="You have chosen to have the gene duplication cost to make up ";
double gtpwt=(1-structwt)*100;
message+=gtpwt;
message+="% and the substructure cost to make up the remaining ";
message+=100-gtpwt;
message+="%.";
PrintMessage();
}
}
else if(token.Abbreviation("PThreshold")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
double proposedthresh=atof( numbernexus.c_str() ); //convert to double
if ((proposedthresh<0) || (proposedthresh>1)) {
errormsg="Error: The p-value threshold must be between 0 and 1 (inclusive)";
throw XNexus( errormsg);
}
else {
pthreshold=proposedthresh;
message="You have chosen to have structure p values > ";
message+=pthreshold;
message+=" ignored";
PrintMessage();
}
}
else if(token.Abbreviation("SUBsample")) {
nxsstring numbernexus;
numbernexus = GetNumber(token);
double proposedsubsample=atof( numbernexus.c_str() ); //convert to double
if (proposedsubsample<1) {
errormsg="Error: The subsample threshold must be greater than one";
throw XNexus( errormsg);
}
else {
chosensubsampling=proposedsubsample;
message="You have chosen a split leaf subsample value of ";
message+=chosensubsampling;
PrintMessage();
}
}
}
}
vector<double> BROWNIE::GetCombinedScore(ContainingTree *SpeciesTreePtr)
{
vector<double> scorevector;
bool calculatescore=true;
bool infinitescore=false;
if (useCOAL) {
int maxspecies=0;
int currentnumberofspecies=0;
float neglnlikelihood=GSL_POSINF;
for (int i=0;i<convertsamplestospecies.size();i++) {
maxspecies=GSL_MAX(maxspecies,convertsamplestospecies[i]);
}
vector<int> speciesvector(maxspecies,0);
for (int i=0;i<convertsamplestospecies.size();i++) {
speciesvector[(convertsamplestospecies[i])-1]++;
}
for (int i=0;i<speciesvector.size();i++) {
if (speciesvector[i]<minsamplesperspecies) {
calculatescore=false;
}
}
if (calculatescore && maxspecies==1) { //COAL can't calculate probability of gene tree on species tree with one species, so use Harding 1971 equation 5.3 to do it manually
neglnlikelihood=0;
for (int chosentreenum=0; chosentreenum<trees->GetNumTrees(); chosentreenum++) { //loop over all the gene trees
Tree CurrentGeneTreeTreeFmt=intrees.GetIthTree(chosentreenum);
CurrentGeneTreeTreeFmt.Update(); //sets weights
NodeIterator <Node> n (CurrentGeneTreeTreeFmt.GetRoot()); //starts at leaves and works down
double probability=1;
cur = n.begin();
while (cur) {
if (!(cur->IsLeaf())) {
int Q=(cur->GetChild())->GetWeight();
int R=((cur->GetChild())->GetSibling())->GetWeight();
int N=R+Q;
probability*=2*(gsl_sf_fact(R))*(gsl_sf_fact(Q))/((N-1.0)*gsl_sf_fact(N)); //in Harding's equation, we calculate [Pl{Q] and Pl[R] when we are at the child nodes
//cout<<"probability now "<<probability<<" with Q="<<Q<<" and R="<<R<<endl;
}
//else {
// cout<<"Leaf "<<cur->GetLabel()<<endl;
//}
cur = n.next();
}
neglnlikelihood+=-1.0*log(probability);
//cout<<"neglnlikelihood now "<<neglnlikelihood<<endl<<endl;
}
}
else if (calculatescore && maxspecies>1) {
//export the species tree
nxsstring coalspecies="coalspecies.txt";
ofstream coalspeciesstream;
coalspeciesstream.open( coalspecies.c_str() );
ContainingTree CurrentTree=*SpeciesTreePtr;
CurrentTree.FindAndSetRoot();
CurrentTree.Update();
CurrentTree.GetNodeDepths();
// if(logf_open) {
// logf<<"Now trying with species tree: ";
// CurrentTree.Write(logf);
// }
//CurrentTree.Draw(cout);
NodeIterator <Node> n (CurrentTree.GetRoot());
cur = n.begin();
while (cur) {
if (cur->IsLeaf()) {
currentnumberofspecies++;
//nxsstring newlabel=PipeLeafFinalSpeciesTree();
//cur->SetLabel(newlabel);
}
else {
cur->SetLabel("");
}
cur = n.next();
}
(CurrentTree).Write(coalspeciesstream);
coalspeciesstream<<endl;
coalspeciesstream.close();
//export the gene trees
nxsstring coalgenes="coalgenes.txt";
ofstream coalgenesstream;
coalgenesstream.open( coalgenes.c_str());
for (int chosentreenum=0; chosentreenum<trees->GetNumTrees(); chosentreenum++) { //loop over all the gene trees
Tree CurrentGeneTreeTreeFmt=intrees.GetIthTree(chosentreenum);
NodeIterator <Node> n (CurrentGeneTreeTreeFmt.GetRoot());
cur = n.begin();
while (cur) {
if (cur->IsLeaf()) {
int SampleNumber=taxa->FindTaxon(cur->GetLabel());
nxsstring newlabel="taxon";
newlabel+=convertsamplestospecies[SampleNumber];
newlabel+="-";
newlabel+=SampleNumber;
cur->SetLabel(newlabel);
//cout<<"current label is "<<cur->GetLabel()<<" perhaps with quotes"<<endl;
}
else {
cur->SetLabel("");
}
cur = n.next();
}
(CurrentGeneTreeTreeFmt).WriteNoQuote(coalgenesstream);
coalgenesstream<<endl;
}
coalgenesstream.close();
//export the infile
nxsstring coalinfile="infile";
ofstream coalinfilestream;
coalinfilestream.open(coalinfile.c_str() );
coalinfilestream<<"begin coal;"<<endl;
coalinfilestream<<"nstaxa ";
coalinfilestream<<currentnumberofspecies<<";"<<endl;
coalinfilestream<<"ngtaxa";
nxsstring speciesnames="taxa names = ";
for (int i=1; i<=currentnumberofspecies; i++) {
int samplecount=0;
for (int j=0; j<=convertsamplestospecies.size();j++) {
if (convertsamplestospecies[j]==i) {
samplecount++;
}
}
speciesnames+=" taxon";
speciesnames+=i;
coalinfilestream<<" "<<samplecount;
}
coalinfilestream<<";"<<endl;
coalinfilestream<<speciesnames<<";"<<endl;
coalinfilestream<<"intra yes;"<<endl;
coalinfilestream<<"gene tree file=coalgenes.txt;"<<endl;
coalinfilestream<<"species tree file=coalspecies.txt;"<<endl;
coalinfilestream<<"nstrees 1;"<<endl;
coalinfilestream<<"ngtrees "<<trees->GetNumTrees()<<";"<<endl;
nxsstring coaloutputfile="coalout.txt";
coalinfilestream<<"outfile = "<<coaloutputfile<<" / probs ;"<<endl;
coalinfilestream<<"end;"<<endl;
coalinfilestream.close();
//run coal
//cout<<"Starting to call coal"<<endl;
int coalreturncode=-1;
if (currentnumberofspecies>0) { //just to head off a weird error
int returncodefailurecount=-1;
while (coalreturncode!=0) {
returncodefailurecount++;
coalreturncode=system("coal > /dev/null");
if (returncodefailurecount>100) {
errormsg="Had over 100 failures of COAL";
throw XNexus( errormsg);
break;
}
}
}
//cout<<"Done calling coal, now parsing"<<endl;
//cout <<"coal returncode is "<<coalreturncode<<endl;
if (coalreturncode==0) {
int returncount=0;
neglnlikelihood=0;
ifstream coalin;
coalin.open( coaloutputfile.c_str(), ios::binary | ios::in );
if (coalin) {
char inputitem [COMMAND_MAXLEN];
coalin>>inputitem;
bool nextisprob=true; //since the input file alternates gene_tree_number SPACES prob
while (!coalin.eof()) {
coalin>>inputitem;
if (nextisprob) {
double probability=atof(inputitem);
if (probability==0 || probability !=probability) { //test for it not being a number or for it being nan
neglnlikelihood=GSL_POSINF;
break;
}
neglnlikelihood+=-1.0*log(probability);
returncount++;
//cout<<"prob is "<<inputitem<<endl;
nextisprob=false;
}
else {
nextisprob=true;
}
}
}
coalin.close();
if (returncount!=trees->GetNumTrees()) {
neglnlikelihood=GSL_POSINF; //THere's an error, we returned the wrong number of likelihood scores
}
}
//cout<<"Done parsing"<<endl<<endl;
//process the output
}
assert(neglnlikelihood>0);
double score=neglnlikelihood;
if (COALaicmode==0) {
//do nothing, we're just using raw lnL
}
else if (COALaicmode==1) {
score=AIC(neglnlikelihood,currentnumberofspecies);
}
else if (COALaicmode==2) {
score=AICc(neglnlikelihood,currentnumberofspecies,trees->GetNumTrees());
}
else if (COALaicmode==3) {
score=AICc(neglnlikelihood,currentnumberofspecies,taxa->GetNumTaxonLabels());
}
else if (COALaicmode==4) {
score=AICc(neglnlikelihood,currentnumberofspecies,(trees->GetNumTrees())*(taxa->GetNumTaxonLabels()));
}
scorevector.push_back(neglnlikelihood);
scorevector.push_back(0);
scorevector.push_back(0);
//cout<<"neglnlikelihood = "<<neglnlikelihood<<endl;
//if(logf_open) {
// logf<<"\t[neglnlikelihood = "<<neglnlikelihood<<" ]"<<endl;
//}
}
else if (useMS) {
double neglnlikelihood=0.0;
//ms $nsamples $ntrees -T -I 2 $nsamplessp1 $nsamplessp2 -ej $splittime 1 2
//echo "cat" | grep "\(cat\|dog\)"
//Convert species tree to MS format
ContainingTree CurrentTree=*SpeciesTreePtr;
CurrentTree.FindAndSetRoot();
CurrentTree.Update();
CurrentTree.GetNodeDepths();
int numspecies=CurrentTree.GetNumLeaves();
int ntax=taxa->GetNumTaxonLabels();
nxsstring msstring="ms ";
msstring+=ntax;
msstring+=" ";
msstring+=msbasereps; //number of trees to generate
msstring+=" -T -I ";
msstring+=numspecies;
msstring+=" ";
vector <int> samplesperspecies;
vector <nxsstring> labelswithinspecies;
double numberofpermutations=1;
int previoustotal=0;
for (int currentspecies=1; currentspecies<=numspecies; currentspecies++) {
int samplecountthisspecies=0;
nxsstring labelregex="\\(";
for (int currentsample=0; currentsample<ntax; currentsample++) {
if (convertsamplestospecies[currentsample]==currentspecies) {
samplecountthisspecies++;
}
}
msstring+=samplecountthisspecies;
samplesperspecies.push_back(samplecountthisspecies);
for (int i=1; i<=samplecountthisspecies; i++) {
if (i>1) {
labelregex+="\\|";
}
labelregex+=i+previoustotal;
}
labelregex+="\\):[0-9]*.[0-9]*"; //ms exports brlen, too
labelswithinspecies.push_back(labelregex);
previoustotal+=samplecountthisspecies;
numberofpermutations*=gsl_sf_fact(samplecountthisspecies);
msstring+=" ";
}
NodeIterator <Node> n (CurrentTree.GetRoot());
//loop once to get MS string
cur = n.begin();
while (cur) {
if (cur->IsLeaf()) {
int speciesnumber;
nxsstring specieslabel=cur->GetLabel();
string speciesstring=specieslabel.c_str();
size_t index = speciesstring.find('t');
speciesstring.erase(index,5); //erase "taxon"
cur->SetLabel(speciesstring);
}
else {
//assumes binary, ultrametric tree
if (cur->GetChild()!=NULL) { //will only really occur if the tree has just a root node
if ((cur->GetChild())->GetSibling()!=NULL) {
int childid=atoi(((cur->GetChild())->GetLabel()).c_str());
int sibid=atoi((((cur->GetChild())->GetSibling())->GetLabel()).c_str());
nxsstring newlabel="";
newlabel+=GSL_MIN(childid,sibid);
cur->SetLabel(newlabel);
double splittime=0;
NodePtr childnode=cur->GetChild();
while (childnode!=NULL) {
splittime+=childnode->GetEdgeLength();
childnode=childnode->GetChild();
}
msstring+="-ej ";
msstring+=splittime;
msstring+=" ";
msstring+=GSL_MAX(childid,sibid);
msstring+=" ";
msstring+=GSL_MIN(childid,sibid);
msstring+=" ";
}
}
}
cur = n.next();
}
//rather than looking for exact match, get probabilities of given topology with all possible permutations of labels of samples from a given species onto ms taxon numbers, then divide by number of such permutations
//loop over all the gene trees
for (int chosentreenum=0; chosentreenum<trees->GetNumTrees(); chosentreenum++) { //loop over all the gene trees
nxsstring grepstring="";
int numberintraspecificcherries=0;
Tree CurrentGeneTreeTreeFmt=intrees.GetIthTree(chosentreenum);
NodeIterator <Node> n (CurrentGeneTreeTreeFmt.GetRoot());
//traverse once to count intraspecific sister cherries
cur = n.begin();
while (cur) {
if (cur->IsLeaf()) {
NodePtr sister=cur->GetSibling();
if (sister!=NULL) {
if (sister->IsLeaf()) {
//cout<<cur->GetLabel()<<" with "<<sister->GetLabel()<<endl;
if (convertsamplestospecies[taxa->FindTaxon(cur->GetLabel())]==convertsamplestospecies[taxa->FindTaxon(sister->GetLabel())]) { //they have the same species (though node labels are changed, we do the one that's a child before its sib)
numberintraspecificcherries++;
}
}
}
}
cur = n.next();
}
//now get the grep lines
cur = n.begin();
while (cur) {
if (cur->IsLeaf()) {
cur->SetLabel(labelswithinspecies[-1+convertsamplestospecies[taxa->FindTaxon(cur->GetLabel())]]); //gets regex for tip labels
}
else {
nxsstring combinedstring="";
nxsstring leftchild=(cur->GetChild())->GetLabel();
nxsstring rightchild=((cur->GetChild())->GetSibling())->GetLabel();
nxsstring additionalregex="(\\(";
additionalregex+=leftchild;
additionalregex+=",";
additionalregex+=rightchild;
additionalregex+="\\|";
additionalregex+=rightchild;
additionalregex+=",";
additionalregex+=leftchild;
additionalregex+="\\))";
if (cur->GetAnc()!=NULL) { // not root
additionalregex+=":*[0-9]*.[0-9]*";
}
else { //originally, did this at each node, with the idea that from MS, you first filter out the lines that don't match a cherry, then filter from that filtered set, etc., thinking it would be faster than just using the regex at the root. Actually, just using the regex at the root was faster, so I do that.
grepstring+=" | grep -c \"";
grepstring+=additionalregex;
grepstring+="\"";
}
cur->SetLabel(additionalregex);
}
cur = n.next();
}
nxsstring finalsystemcall=msstring;
//cout<<msstring<<endl;
finalsystemcall+=grepstring;
nxsstring msinputfile="mscount.txt";
finalsystemcall+=" > ";
finalsystemcall+=msinputfile;
//system("rm mscount.txt");
int returncode=system(finalsystemcall.c_str());
if (debugmode) {
cout<<"return code is "<<returncode<<" (divided by 256, is "<<returncode/256<<")"<<endl;
}
//cout<<finalsystemcall<<endl;
/* int returnattempts=-1;
int returncode=-1;
while (returncode!=0) {
if (returnattempts>0) {
system("rm mscount.txt");
}
returncode=system(finalsystemcall.c_str());
returnattempts++;
if (returnattempts>5) {
message="Had over 5 failures of MS, calling\n\n";
message+=finalsystemcall;
message+="\n";
PrintMessage();
//throw XNexus( errormsg);
break;
}
if (returncode!=0) {
message="Warning: ms returned error code ";
message+=returncode;
PrintMessage();
}
}*/
// if (returncode==0) {
ifstream msin;
msin.open( msinputfile.c_str(), ios::binary | ios::in );
if (msin) {
char inputitem [COMMAND_MAXLEN];
msin>>inputitem;
double numbermatches=atof(inputitem);
if (numbermatches==0) {
infinitescore=true;
}
neglnlikelihood+=-1.0*(log(GSL_MAX(numbermatches,0.01))-log(msbasereps)-log((1.0*numberofpermutations)/(1.0*numberintraspecificcherries))); //numbermatches is an integer, but log(0) is infinite. Idea is to make this a really big but not infinite number. Probability of a gene tree is #of times it was observed (with all permutations of tip labels within species) divided by the number of trees returned, all divided by the number of possible permutations (since this gene tree is just one realization of that), correcting for the number of intraspecific cherries (otherwise, for example, with a single species with a gene with two samples, (1,2) might be the gene tree, but we say the number of perms=2, and so though we find ((1|2),(1|2)) in all the ms returns, we would divide this by 2, when the real probability is one.
// }
msin.close();
}
else {
message="\nWarning: got return code of ";
message+=returncode;
message+=" for ms string of \n\n\t";
message+=msstring;
message+="\n\nand grep string of \n\n";
message+=grepstring;
message+="\n\n";
neglnlikelihood=GSL_POSINF;
}
}
double score=neglnlikelihood;
if (COALaicmode==0) {
//do nothing, we're just using raw lnL
}
else if (COALaicmode==1) {
score=AIC(neglnlikelihood,numspecies);
}
else if (COALaicmode==2) {
score=AICc(neglnlikelihood,numspecies,trees->GetNumTrees());
}
else if (COALaicmode==3) {
score=AICc(neglnlikelihood,numspecies,taxa->GetNumTaxonLabels());
}
else if (COALaicmode==4) {
score=AICc(neglnlikelihood,numspecies,(trees->GetNumTrees())*(taxa->GetNumTaxonLabels()));
}
scorevector.push_back(neglnlikelihood);
scorevector.push_back(0);
scorevector.push_back(0);
}
else {
//We do this so we don't bother doing the GTP or triplet calculations if they're not needed.
triplettoohigh=false;
gtptoohigh=false;
double combinedscore=0;
double tripletscore=0;
double gtpscore=0;
if (minsamplesperspecies>1) {
int maxspecies=0;
for (int i=0;i<convertsamplestospecies.size();i++) {
maxspecies=GSL_MAX(maxspecies,convertsamplestospecies[i]);
}
vector<int> speciesvector(maxspecies,0);
for (int i=0;i<convertsamplestospecies.size();i++) {
speciesvector[(convertsamplestospecies[i])-1]++;
}
for (int i=0;i<speciesvector.size();i++) {
if (speciesvector[i]<minsamplesperspecies) {
calculatescore=false;
combinedscore=GSL_POSINF;
tripletscore=GSL_POSINF;
gtpscore=GSL_POSINF;
}
}
}
if (calculatescore) {
if (structwt>0) {
tripletscore=double(GetTripletScore(SpeciesTreePtr));
// (*SpeciesTreePtr).Write(cout);
// cout<<endl;
// for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
// cout<<"\ntriplet score = "<<newscore<<endl<<endl;
// combinedscore+=newscore;
}
if (structwt<1 && (triplettoohigh==false)) {
//combinedscore+=(1.0-structwt)*GetGTPScore(SpeciesTreePtr);
// cout<<"Mike gave "<<combinedscore/(1.0-structwt)<<"\nI gave "<<GetGTPScoreNew(SpeciesTreePtr)<<endl;
//cout<<"SpeciesTree"<<endl;
//(*SpeciesTreePtr).Write(cout);
//cout<<endl;
gtpscore=double(GetGTPScoreNew(SpeciesTreePtr));
}
combinedscore=((1.0-structwt)*gtpscore)+(structwt*tripletscore);
}
scorevector.push_back(combinedscore);
scorevector.push_back(gtpscore);
scorevector.push_back(tripletscore);
}
return scorevector;
}
//checks to make sure that the convertsamplestospecies vector is not missing assignments to any species (i.e, doesn't consist only of species 1, 3 and 4). If fix==true, it'll fix this. It will return true if check
bool BROWNIE::CheckConvertSamplesToSpeciesVector(bool fix)
{
int maxspecies=0;
bool goodshape=true;
int firstbadspecies=0;
for (int i=0;i<convertsamplestospecies.size();i++) {
maxspecies=GSL_MAX(maxspecies,convertsamplestospecies[i]);
}
vector<int> speciesvector(maxspecies,0);
for (int i=0;i<convertsamplestospecies.size();i++) {
speciesvector[(convertsamplestospecies[i])-1]++;
}
for (int i=0;i<speciesvector.size();i++) {
if (speciesvector[i]==0) {
goodshape=false;
firstbadspecies=i+1;
break;
}
}
if (!goodshape && fix) {
for (int i=0;i<convertsamplestospecies.size();i++) {
if (convertsamplestospecies[i]>firstbadspecies) {
convertsamplestospecies[i]=convertsamplestospecies[i]-1;
}
}
goodshape=CheckConvertSamplesToSpeciesVector(true);
}
return goodshape;
}
bool BROWNIE::CombineSpeciesWithTooFewSamples(bool fix) { //The idea of this is to fix the assignment vector if there are too few samples per species by combining the too small species with the next species and then fixing the resulting hole
bool goodsamplenumber=true;
if (minsamplesperspecies>1) {
int maxspecies=0;
for (int i=0;i<convertsamplestospecies.size();i++) {
maxspecies=GSL_MAX(maxspecies,convertsamplestospecies[i]);
}
vector<int> speciesvector(maxspecies,0);
for (int i=0;i<convertsamplestospecies.size();i++) {
speciesvector[(convertsamplestospecies[i])-1]++;
}
for (int i=0;i<speciesvector.size();i++) {
if (speciesvector[i]<minsamplesperspecies) {
goodsamplenumber=false;
if (fix) {
for (int j=0;j<convertsamplestospecies.size();j++) {
if (convertsamplestospecies[j]==i+1) {
if ((i+1)<maxspecies) { //so there's a species to the right to combine with.
convertsamplestospecies[j]=convertsamplestospecies[j]+1;
}
else {
convertsamplestospecies[j]=convertsamplestospecies[j]-1;
}
}
}
bool goodshape=CheckConvertSamplesToSpeciesVector(true);
goodsamplenumber=CombineSpeciesWithTooFewSamples(true);
}
}
}
}
return goodsamplenumber;
}
bool BROWNIE::CheckConvertSamplesToSpeciesVectorSpNum(int actualmaxspeciesnum)
{
int maxspecies=0;
bool goodshape=true;
int firstbadspecies=0;
for (int i=0;i<convertsamplestospecies.size();i++) {
maxspecies=GSL_MAX(maxspecies,convertsamplestospecies[i]);
}
if (maxspecies!=actualmaxspeciesnum) {
goodshape=false;
}
if (goodshape) {
vector<int> speciesvector(maxspecies,0);
for (int i=0;i<convertsamplestospecies.size();i++) {
speciesvector[(convertsamplestospecies[i])-1]++;
}
for (int i=0;i<speciesvector.size();i++) {
if (speciesvector[i]==0) {
goodshape=false;
firstbadspecies=i+1;
break;
}
}
}
return goodshape;
}
bool BROWNIE::CheckConvertSamplesToSpeciesTooManySpecies()
{
int observedmaxspecies=0;
bool goodshape=true;
for (int i=0;i<convertsamplestospecies.size();i++) {
observedmaxspecies=GSL_MAX(observedmaxspecies,convertsamplestospecies[i]);
}
if (observedmaxspecies>maxnumspecies) {
goodshape=false;
}
return goodshape;
}
bool BROWNIE::CheckConvertSamplesToSpeciesTooFewSpecies()
{
int observedmaxspecies=0;
bool goodshape=true;
for (int i=0;i<convertsamplestospecies.size();i++) {
observedmaxspecies=GSL_MAX(observedmaxspecies,convertsamplestospecies[i]);
}
if (observedmaxspecies<minnumspecies) {
goodshape=false;
}
return goodshape;
}
bool BROWNIE::MoveSamples(vector<int> Originalconvertsamplestospecies)
{
bool morereassignments=true;
int sampletomove=SamplesToMove.back();
int destination=SampleDestinations.back();
for(int i=0;i<(CladeVector[sampletomove]).size();i++) {
convertsamplestospecies[(CladeVector[sampletomove][i])]=destination;
}
SampleDestinations.pop_back();
if (SampleDestinations.size()==0) { //we've finished trying to assign all destinations
convertsamplestospecies.swap(Originalconvertsamplestospecies); //need to use the original vector; if this try results in a better vector, we'll throw out the Sample___ vectors anyway; if not, we'll go back to the Originalconvertsamplestospecies vector, so we have to make sure any moves are allowed under that vector
int nspecies=0;
for (int i=0;i<convertsamplestospecies.size();i++) {
nspecies=GSL_MAX(convertsamplestospecies[i],nspecies);
}
vector<int> TempSampleDestinations;
if (SamplesToMove.size()<2) { //we were already on the last sample, too
morereassignments=false;
}
else {
bool enoughdestinations=false;
while (!enoughdestinations && SamplesToMove.size()>1) {
SamplesToMove.pop_back(); //we're done trying to move that sample
SampleDestinations.clear();
for (int i=0;i<nspecies;i++) {
if(TestMoveSamples(SamplesToMove.back(),i+1)) {
TempSampleDestinations.push_back(i+1);
enoughdestinations=true;
}
}
}
morereassignments=enoughdestinations;
if(enoughdestinations) {
gsl_permutation * v = gsl_permutation_alloc (TempSampleDestinations.size());
gsl_permutation_init (v);
gsl_ran_shuffle (r, v->data, TempSampleDestinations.size(), sizeof(size_t));
for (int i=0; i<TempSampleDestinations.size(); i++) {
SampleDestinations.push_back(TempSampleDestinations[gsl_permutation_get (v,i)]);
}
gsl_permutation_free(v);
}
}
convertsamplestospecies.swap(Originalconvertsamplestospecies);
}
return morereassignments;
}
bool BROWNIE::TestMoveSamples(int Sample, int Destination)
{
bool validmove=true;
int maxspeciesorig=0;
for (int i=0; i<convertsamplestospecies.size();i++) {
maxspeciesorig=GSL_MAX(maxspeciesorig,convertsamplestospecies[i]);
}
vector<int> origconvertsamplestospecies=convertsamplestospecies;
for(int i=1;i<=(CladeVector[Sample]).size();i++) {
//convertsamplestospecies[(CladeVector.at(Sample)).at(i)]=Destination;
int convertsamplestospeciessize=convertsamplestospecies.size();
int cladevectorsamplesize=(CladeVector[Sample]).size();
int cladevectorsize=CladeVector.size();
if(cladevectorsamplesize>convertsamplestospeciessize || cladevectorsamplesize<0) {
nxsstring outputname=treefilename;
outputname+=".CladeVector.txt";
ofstream cladevector;
cladevector.open( outputname.c_str(), ios::out | ios::app );
for (int k=0;k<CladeVector.size();k++) {
cladevector<<endl;
for (int j=0;j<(CladeVector[k]).size();j++) {
cladevector<<" "<<CladeVector[k][j];
}
}
cladevector<<endl;
cladevector<<endl<<"convertsamplestospeciessize = "<<convertsamplestospeciessize<<endl;
cladevector<<"cladevectorsamplesize = "<<cladevectorsamplesize<<endl;
cladevector<<"cladevectorsize = "<<cladevectorsize<<endl;
cladevector<<"Sample = "<<Sample<<endl<<"Destination = "<<Destination<<endl<<"i = "<<i<<endl;
cladevector.close();
}
convertsamplestospecies[(CladeVector[Sample][i-1])]=Destination; // so it doesn't run if CladeVector[Sample].size()=0
}
validmove=CheckConvertSamplesToSpeciesVector(false);
if(validmove) {
if(convertsamplestospecies==origconvertsamplestospecies) {
validmove=false; //since this induces no change
}
}
if(validmove) {
int maxspeciesfinal=0;
for (int i=0; i<convertsamplestospecies.size();i++) {
maxspeciesfinal=GSL_MAX(maxspeciesfinal,convertsamplestospecies[i]);
}
if (maxspeciesfinal!=maxspeciesorig) {
validmove=false; //need to have same number of species at start and end
}
}
convertsamplestospecies.swap(origconvertsamplestospecies);
return validmove;
}
void BROWNIE::DoExhaustiveSearch()
{
message="This will do an exhaustive search for up to 6 species. This will take some time.";
PrintMessage();
bestscore=GSL_POSINF;
vector<double> nextscorevector;
double nextscore;
message="Now starting with 1 species";
PrintMessage();
ContainingTree CurrentTree;
CurrentTree.RandomTree(1);
CurrentTree.ConvertTaxonNamesToRandomTaxonNumbers();
convertsamplestospecies.clear();
for (int i=0;i<taxa->GetNumTaxonLabels();i++) {
convertsamplestospecies.push_back(1);
cout<<" 1";
}
if (useCOAL || useMS) {
CurrentTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&CurrentTree);
nextscore=nextscorevector[0];
bestscore=nextscore; //this must be the best score, as it's the first tree
//TotalScores.push_back(nextscorevector[0]);
//GTPScores.push_back(nextscorevector[1]);
//StructScores.push_back(nextscorevector[2]);
FormatAndStoreBestTree(&CurrentTree,nextscorevector);
message="Score: ";
message+=nextscore;
PrintMessage();
message="Now starting with 2 species";
ContainingTree TreeTwo;
int maxspecies=2;
TreeTwo.RandomTree(maxspecies);
TreeTwo.ConvertTaxonNamesToRandomTaxonNumbers();
bestscore=DoAllAssignments(bestscore,maxspecies,&TreeTwo);
message="Now starting with 3 species";
ContainingTree TreeThree;
maxspecies=3;
TreeThree.RandomTree(maxspecies);
TreeThree.ConvertTaxonNamesToRandomTaxonNumbers();
bestscore=DoAllAssignments(bestscore,maxspecies,&TreeThree);
cout<<endl<<"Best trees overall"<<endl<<endl;
for (int i=0; i<FormattedBestTrees.size(); i++) {
(FormattedBestTrees[i]).Update();
(FormattedBestTrees[i]).GetNodeDepths();
(FormattedBestTrees[i]).Draw(cout);
cout<<endl;
}
message="\n\n#nexus\nbegin trees;\n";
for (int i=0; i<FormattedBestTrees.size(); i++) {
message+="tree sptree";
message+=i+1;
message+=" = ";
if (unrooted==1) {
message+="[&U] ";
}
else {
message+="[&R] ";
}
message+=ReturnFinalSpeciesTree(FormattedBestTrees[i]);
message+="\n";
}
message+="end;\n\n";
PrintMessage();
message="Best score = ";
message+=bestscore;
PrintMessage();
}
double BROWNIE::DoAllAssignments(double bestscore, int maxspecies, ContainingTree *SpeciesTree )
{
vector<double> nextscorevector;
double nextscore;
vector <int> countpertaxon(taxa->GetNumTaxonLabels(),1);
for (int i=0;i<taxa->GetNumTaxonLabels();i++) {
convertsamplestospecies[i]=1;
cout<<" "<<convertsamplestospecies[i];
}
for (int assignmentrep=0;assignmentrep<pow(maxspecies,taxa->GetNumTaxonLabels());assignmentrep++) {
if (CheckConvertSamplesToSpeciesVectorSpNum(maxspecies)) { //we have the right number of species
nextscorevector=GetCombinedScore(SpeciesTree);
nextscore=nextscorevector[0];
cout<<" Score: "<<nextscore<<" Best: "<<GSL_MIN(nextscore,bestscore);
if (nextscore==bestscore) {
// TotalScores.push_back(nextscorevector[0]);
// GTPScores.push_back(nextscorevector[1]);
// StructScores.push_back(nextscorevector[2]);
FormatAndStoreBestTree(SpeciesTree,nextscorevector);
}
else if (nextscore<bestscore) {
bestscore=nextscore;
FormattedBestTrees.clear();
TotalScores.clear();
GTPScores.clear();
StructScores.clear();
BestConversions.clear();
ContourSearchDescription.clear();
//TotalScores.push_back(nextscorevector[0]);
//GTPScores.push_back(nextscorevector[1]);
//StructScores.push_back(nextscorevector[2]);
FormatAndStoreBestTree(SpeciesTree,nextscorevector);
}
}
else {
cout<<" Invalid assignment for "<<maxspecies<<" species";
}
cout<<endl;
// for (int i=0;i<countpertaxon.size();i++) {
//cout<<" "<<countpertaxon[i];
// }
//cout<<endl;
for (int i=0;i<countpertaxon.size();i++) {
countpertaxon[i]++;
// cout<<" maxspecies = "<<maxspecies<<" i = "<<i<<" pow = "<<pow(maxspecies,i)<<" countpertaxon = "<<countpertaxon[i];
if (countpertaxon[i]>pow(maxspecies,i)) {
// cout<<" must change ";
convertsamplestospecies[i]++;
countpertaxon[i]=1;
if (convertsamplestospecies[i]>maxspecies) {
convertsamplestospecies[i]=1;
}
}
cout<<" "<<convertsamplestospecies[i];
}
}
return bestscore;
}
void BROWNIE::DoHeuristicSearch()
{
int chosenmove=0;
if (jackknifesearch) {
message="\n---------- Now starting jackknife search replicate ";
message+=jackrep;
message+=" ----------";
PrintMessage();
int includedtrees=0;
while (includedtrees<2 || includedtrees==trees->GetNumTrees()) {
jackknifevector.clear();
geneidvector.clear();
double currenttotalwt=0;
int genenumber=1;
for (int curtreenum=0; curtreenum<trees->GetNumTrees(); curtreenum++) {
double newweight=trees->GetTreeWeight(curtreenum);
currenttotalwt+=newweight;
if (currenttotalwt>1) {
currenttotalwt=newweight;
genenumber++;
}
geneidvector.push_back(genenumber);
jackknifevector.push_back(gsl_ran_bernoulli(r, newweight*(1-pctdelete))); //we randomly select trees
if (jackknifevector.back()==1) {
includedtrees++;
}
}
}
message="---------- ";
message+=includedtrees;
message+=" out of ";
message+=trees->GetNumTrees();
message+=" input trees are included. ----------\n\n";
PrintMessage();
}
message="Creating initial neighbor-joining tree for samples, based on triplet overlap. Please be patient.\n\nNow getting distances...";
PrintMessage();
GetTaxonTaxonTripletDistances();
message="Computing tree...";
PrintMessage();
ContainingTree NJBrlenTree=ComputeTripletNJTree();
ContainingTree TripletSupportBrlenTree=NJBrlenTree;
TripletSupportBrlenTree.Update();
//message="Tree of samples with triplet support brlen: ";
//PrintMessage();
//TripletSupportBrlenTree.Draw(cout);
NodeIterator <Node> l (TripletSupportBrlenTree.GetRoot()); //goes from tips down
NodePtr currentnode=l.begin();
int nodecount=0;
while (currentnode) {
if (currentnode->IsLeaf()) {
int leafnumtrans=(currentnode->GetLeafNumber())-1;
currentnode->SetEdgeLength(CladeVectorTripletSupport[leafnumtrans]);
}
else if(currentnode!=TripletSupportBrlenTree.GetRoot()) {
int nodenumber=nodecount+TripletSupportBrlenTree.GetNumLeaves();
currentnode->SetEdgeLength(CladeVectorTripletSupport[nodenumber]);
nodecount++;
}
currentnode=l.next();
}
if( logf_open ) {
TripletSupportBrlenTree.Draw(logf);
logf << endl;
logf << "#nexus\nbegin trees;\ntree GuideTreeTripletSupportBrlen = ";
TripletSupportBrlenTree.Write(logf);
logf << "\ntree GuideTreeTripletSupportBrlen = ";
NJBrlenTree.Write(logf);
logf<<endl<<"end;"<<endl;
}
message="Proportion & type of moves:\n\t";
message+=movefreqvector[0];
message+="\tSubtree pruning and regrafting\n\t";
message+=movefreqvector[1];
message+="\tMove samples from one species to another\n\t";
message+=movefreqvector[2];
message+="\tIncrease the number of species\n\t";
message+=movefreqvector[3];
message+="\tDecrease the number of species\n\t";
message+=movefreqvector[4];
message+="\tReroot the species tree\n\t";
message+=movefreqvector[5];
message+="\tChange species tree branch lengths\n";
PrintMessage();
bestscore=GSL_POSINF;
vector<int> intialconvertsamplestospeciesvector=convertsamplestospecies;
double nextscore;
double nextgtpscore;
double nexttripletscore;
vector<double> nextscorevector;
nxsstring scoretype;
message="Now starting the search proper.\nA \">\" before a score indicates that calculation of that score was aborted once the score for that move exceeded the best local score\n";
if (!jackknifesearch) {
if (!useCOAL && !useMS) {
message+="\nRep\tMoves\t#Spp\tType\tQual\tCombScore\t GTP\t Struct\t Local\t Global\tNTrees\tRemaining";
}
else {
if (COALaicmode==0) {
message+="\nRep\tMoves\t#Spp\tType\tQual\t NegLnL\t Local\t Global\tNTrees\tRemaining";
}
else if (COALaicmode==1) {
message+="\nRep\tMoves\t#Spp\tType\tQual\t AIC\t Local\t Global\tNTrees\tRemaining";
}
else {
message+="\nRep\tMoves\t#Spp\tType\tQual\t AICc\t Local\t Global\tNTrees\tRemaining";
}
}
}
else {
message+="\nJackRep\tRep\tMoves\t#Spp\tType\tQual\tCombScore\t GTP\t Struct\t Local\t Global\tNTrees\tRemaining";
}
PrintMessage();
for (int replicate=1;replicate<=nreps;replicate++) {
convertsamplestospecies=intialconvertsamplestospeciesvector;
ContainingTree StartingTree;
//cout<<"Starting vector = "<<endl;
//convertsamplestospecies=intialconvertsamplestospeciesvector;
//for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
//}
//cout<<endl;
bool assignmentbasedontriplettree=true;
bool validassignment=false;
if (convertsamplestospecies.size()==0) {
double splitwhenappropriateprob=1.0; //See where this is used below (to do initial assignment). This can be dropped if the initial assignments have too many species
while (!validassignment) {
convertsamplestospecies=intialconvertsamplestospeciesvector;
if (assignmentbasedontriplettree) { //use the triplet tree to get assignments
if (gsl_ran_flat(r,0,1)<tripletdistthreshold) {
//New method, based on splitting on longest internal branches in starting nj tree. Basically, split on internal branches with longer than average lengths
int nsamples=taxa->GetNumTaxonLabels();
int numnontrivialclades=nsamples-2;
convertsamplestospecies.assign(nsamples,1);
if (debugmode) {
cout<<"assembling initial convertsamplestospeciesvector, using NJ tree distances"<<endl;
for (int k=0;k<convertsamplestospecies.size();k++) {
cout<<convertsamplestospecies[k]<<" ";
}
cout<<endl;
}
int currentspecies=2;
for (int currentpos=CladeVectorNJBrlen.size()-1;currentpos>=nsamples-1;currentpos--) { //start at the root, work up (based on other order on way down)
if (CladeVectorNJBrlen[currentpos]>meaninternalbrlen && (CladeVector[currentpos]).size()>=minsamplesperspecies && gsl_ran_bernoulli(r,splitwhenappropriateprob)==1) {
for(int j=0;j<(CladeVector[currentpos]).size();j++) {
convertsamplestospecies[(CladeVector[currentpos][j])]=currentspecies;
}
currentspecies++;
if (debugmode) {
for (int taxon=0; taxon<nsamples; taxon++) {
cout<<convertsamplestospecies[taxon]<<" "<<taxa->GetTaxonLabel(taxon)<<endl;
}
cout<<endl;
}
}
}
bool goodshape=CheckConvertSamplesToSpeciesVector(true);
goodshape=CombineSpeciesWithTooFewSamples(true);
if (debugmode) {
for (int taxon=0; taxon<nsamples; taxon++) {
cout<<convertsamplestospecies[taxon]<<" "<<taxa->GetTaxonLabel(taxon)<<endl;
}
cout<<endl;
}
goodshape=CheckConvertSamplesToSpeciesTooManySpecies();
if (!goodshape) {
splitwhenappropriateprob=splitwhenappropriateprob*0.9; //we had too many splits, so drop the chance of splitting
}
if (goodshape) {
goodshape=CheckConvertSamplesToSpeciesTooFewSpecies();
}
validassignment=goodshape;
/* //Old method for getting starting assignments: tended to split good clades too often, not pay attention to relative support
int nsamples=taxa->GetNumTaxonLabels();
int numnontrivialclades=nsamples-2;
convertsamplestospecies.assign(nsamples,1);
int currentpos=nsamples-1;
int currentspecies=2;
while (currentpos<CladeVector.size()) {
currentpos+=1+gsl_ran_binomial(r,.5,4);
if (currentpos<CladeVector.size()) {
for(int j=0;j<(CladeVector[currentpos]).size();j++) {
convertsamplestospecies[(CladeVector[currentpos][j])]=currentspecies;
}
}
currentspecies++;
}
bool goodshape=CheckConvertSamplesToSpeciesVector(true);
goodshape=CombineSpeciesWithTooFewSamples(true);
*/ //Old method for getting starting assignments
}
else { //Use triplet support distances
int nsamples=taxa->GetNumTaxonLabels();
int numnontrivialclades=nsamples-2;
convertsamplestospecies.assign(nsamples,1);
if (debugmode) {
cout<<"assembling initial convertsamplestospeciesvector, using triplet support distances"<<endl;
for (int k=0;k<convertsamplestospecies.size();k++) {
cout<<convertsamplestospecies[k]<<" ";
}
cout<<endl;
}
int currentspecies=2;
for (int currentpos=CladeVectorTripletSupport.size()-1;currentpos>=nsamples-1;currentpos--) { //start at the root, work up (based on other order on way down)
double supportvalue=CladeVectorTripletSupport[currentpos];
if ((supportvalue>gsl_ran_flat(r,0.6,1)) && (CladeVector[currentpos]).size()>=minsamplesperspecies) { //only split on branches with 60% support or more
for(int j=0;j<(CladeVector[currentpos]).size();j++) {
convertsamplestospecies[(CladeVector[currentpos][j])]=currentspecies;
}
currentspecies++;
if (debugmode) {
for (int taxon=0; taxon<nsamples; taxon++) {
cout<<convertsamplestospecies[taxon]<<" "<<taxa->GetTaxonLabel(taxon)<<endl;
}
cout<<endl;
}
}
}
bool goodshape=CheckConvertSamplesToSpeciesVector(true);
goodshape=CombineSpeciesWithTooFewSamples(true);
if (debugmode) {
for (int taxon=0; taxon<nsamples; taxon++) {
cout<<convertsamplestospecies[taxon]<<" "<<taxa->GetTaxonLabel(taxon)<<endl;
}
cout<<endl;
}
goodshape=CheckConvertSamplesToSpeciesTooManySpecies();
if (goodshape) {
goodshape=CheckConvertSamplesToSpeciesTooFewSpecies();
}
validassignment=goodshape;
}
}
else {
//message="You didn't do an intial assignment of taxa to species, so we'll do a random assignment";
//PrintMessage();
int ntax=taxa->GetNumTaxonLabels();
//cout<<"ntax is "<<ntax<<endl;
int samplesperspecies=GSL_MAX(minsamplesperspecies,1+gsl_ran_binomial(r,0.5,7)); // can change this; set now for on average 4.5 samples per species
convertsamplestospecies.clear();
vector <int> tempconvertsamplestospecies;
int speciesid=1;
int assignmentcount=0;
for (int i=0;i<ntax;i++) {
tempconvertsamplestospecies.push_back(speciesid);
assignmentcount++;
if (assignmentcount==samplesperspecies) {
assignmentcount=0;
speciesid++;
samplesperspecies=GSL_MAX(minsamplesperspecies,1+gsl_ran_binomial(r,0.5,7)); //currently set for an average of 4.5 samples per species
//cout<<"speciesid is "<<speciesid<<endl;
}
}
if (assignmentcount<minsamplesperspecies && assignmentcount>0) { //Means the last species has too few samples in it, so we'll merge it with a random earlier species
for (int i=0;i<tempconvertsamplestospecies.size();i++) {
if(tempconvertsamplestospecies[i]==speciesid) {
tempconvertsamplestospecies[i]=1+gsl_ran_binomial(r,0.5,speciesid-2);
}
}
}
gsl_permutation * c = gsl_permutation_alloc (tempconvertsamplestospecies.size());
gsl_permutation_init (c);
gsl_ran_shuffle (r, c->data, tempconvertsamplestospecies.size(), sizeof(size_t));
for (int i=0; i<tempconvertsamplestospecies.size(); i++) {
convertsamplestospecies.push_back(tempconvertsamplestospecies[gsl_permutation_get (c,i)]);
//cout<<tempconvertsamplestospecies[gsl_permutation_get (c,i)]<<endl;
}
gsl_permutation_free(c);
bool goodshape=CheckConvertSamplesToSpeciesTooManySpecies();
if (goodshape) {
goodshape=CheckConvertSamplesToSpeciesTooFewSpecies();
}
validassignment=goodshape;
}
}
}
int CurrentSppNum=0;
for (int vectorpos=0;vectorpos<convertsamplestospecies.size();vectorpos++) {
if (convertsamplestospecies[vectorpos]>CurrentSppNum) {
CurrentSppNum=convertsamplestospecies[vectorpos];
}
}
//cout<<"Starting vector = "<<endl;
//for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
//}
//cout<<endl;
bestscorelocal=GSL_POSINF;
vector<ContainingTree> BestTreesThisRep;
//ContainingTree BestTree;
StartingTree.RandomTree(CurrentSppNum);
StartingTree.ConvertTaxonNamesToRandomTaxonNumbers();
//cout<<"Starting tree"<<endl;
//StartingTree.Write(cout);
//cout<<endl;
//StartingTree.Draw(cout);
//cout<<endl;
//cout<<"Root "<<StartingTree.GetRoot()<<endl;
//cout<<"Root child = "<<(StartingTree.GetRoot())->GetChild()<<endl;
//cout<<"Root child is leaf? "<<((StartingTree.GetRoot())->GetChild())->IsLeaf()<<endl;
//cout<<"\nTree health\n";
//StartingTree.ReportTreeHealth();
//cout<<"\n";
// char* inputforgtp=OutputForGTP(&CurrentTree);
// cout<<OutputForGTP(&StartingTree)<<endl;
// bestscorelocal=ReturnScore(OutputForGTP(&StartingTree));
//cout<<"currentsppnum = "<<CurrentSppNum<<endl;
//cout<<"starting tree health:\n";
//StartingTree.ReportTreeHealth();
if (useCOAL || useMS) {
StartingTree.InitializeMissingBranchLengths();
}
//////////Copied from stuff below////////////////
//brlen optimization
StartingTree.FindAndSetRoot();
StartingTree.Update();
if (useCOAL && StartingTree.GetNumLeaves()>1) {
StartingTree.InitializeMissingBranchLengths();
for (int brlenrep=0; brlenrep<20*(numbrlenadjustments-1); brlenrep++) {
ContainingTree StartingTreeBrlenMod;
StartingTreeBrlenMod.SetRoot(StartingTree.CopyOfSubtree(StartingTree.GetRoot()));
StartingTreeBrlenMod.InitializeMissingBranchLengths();
StartingTreeBrlenMod.RandomlyModifySingleBranchLength(markedmultiplier,brlensigma);
vector<double> brlenscorevector=GetCombinedScore(&StartingTreeBrlenMod);
if (brlenscorevector[0]<=bestscorelocal) {
if (brlenscorevector[0]<bestscorelocal) {
brlenrep=0; //so we restart from the new optimum
bestscorelocal=brlenscorevector[0];
}
StartingTree.SetRoot(StartingTreeBrlenMod.CopyOfSubtree(StartingTreeBrlenMod.GetRoot()));
StartingTree.FindAndSetRoot();
StartingTree.Update();
}
}
}
//Contour search
if (useCOAL && StartingTree.GetNumLeaves()>1) {
vector<double> speciestreebranchlengthvector;
double startingwidth=contourstartingwidth; //start by looking at all brlen between pointestimate/startingwidth and startingwidth*pointestimated
double startingnumbersteps=contourstartingnumbersteps; //works best if odd
int maxrecursions=contourMaxRecursions;
int recursions=0;
int numberofedges=0;
bool donecontour=false;
ContainingTree StartingTreeBrlenMod;
while (!donecontour) {
recursions++;
vector<double> midpointvector;
vector<double> incrementwidths;
vector<double> currentvector;
speciestreebranchlengthvector.clear();
ContourSearchVector.clear();
StartingTree.FindAndSetRoot();
StartingTree.Update();
StartingTree.InitializeMissingBranchLengths();
StartingTreeBrlenMod.SetRoot(StartingTree.CopyOfSubtree(StartingTree.GetRoot()));
StartingTreeBrlenMod.Update();
StartingTreeBrlenMod.InitializeMissingBranchLengths();
vector<double> speciestreebranchlengthvector;
NodeIterator <Node> n (StartingTreeBrlenMod.GetRoot());
NodePtr currentnode = n.begin();
NodePtr rootnode=StartingTreeBrlenMod.GetRoot();
numberofedges=0;
while (currentnode)
{
if (currentnode!=rootnode) {
double edgelength=currentnode->GetEdgeLength();
if (gsl_isnan(edgelength)) {
edgelength=1.0;
}
speciestreebranchlengthvector.push_back(edgelength); //get midpoint edges
double basestep=exp(2.0*log(startingwidth)/(startingnumbersteps-1));
double smallestbrlen=edgelength*(pow(basestep,(0-startingnumbersteps+((startingnumbersteps+1.0)/2.0))));
midpointvector.push_back(edgelength);
currentvector.push_back(smallestbrlen); //start with minimum values, then move up
incrementwidths.push_back(basestep);
numberofedges++;
}
currentnode = n.next();
}
bool donegrid=false;
vector<int> increments;
increments.assign(numberofedges,0);
int origCOALaicmode=COALaicmode;
COALaicmode=0;
vector<double> startingscorevector=GetCombinedScore(&StartingTree);
double currentscore=startingscorevector[0];
while (!donegrid) {
//cout<<"Looping over grid tries"<<endl;
currentnode = n.begin();
int nodenumber=0;
while (currentnode)
{
if (currentnode!=rootnode) {
currentnode->SetEdgeLength(currentvector[nodenumber]);
nodenumber++;
}
currentnode = n.next();
}
vector<double> brlenscorevector=GetCombinedScore(&StartingTreeBrlenMod);
double brlenscore=brlenscorevector[0]-currentscore;
bool atmargin=false;
for (int i=0; i<numberofedges; i++) {
if ((increments[i]==0) || (increments[i]==startingnumbersteps-1)) {
atmargin=true;
}
}
if (atmargin) { //if we're bumping up against minimum branchlength, there's nowhere further to expand the grid
for (int i=0; i<numberofedges; i++) {
if (currentvector[i]==0) {
atmargin=false;
}
}
}
//cout<<endl;
if (atmargin) { //we're at a margin of the space; want to make sure that the region within two lnL is inside this region
if (brlenscore<2 && recursions<maxrecursions) { //our region is too small, since points 2 lnL units away from the max are outside the region
startingwidth*=1.5;
donegrid=true; //break the while(!donegrid) loop; since donecontour isn't done, reinitialize everything
}
}
vector<double> resultvector=currentvector;
resultvector.push_back(brlenscore);
ContourSearchVector.push_back(resultvector);
if (brlenscore<0 && recursions<=maxrecursions) {
currentscore=brlenscorevector[0];
StartingTree.SetRoot(StartingTreeBrlenMod.CopyOfSubtree(StartingTreeBrlenMod.GetRoot()));
StartingTree.FindAndSetRoot();
StartingTree.Update();
donegrid=true; //break the while(!donegrid) loop; since donecontour isn't done, reinitialize everything
if (showtries) {
cout<<"Better branch lengths found in grid search"<<endl;
}
}
increments[0]++;
if (!donegrid) {
for (int itemtoexamine=0; itemtoexamine<numberofedges; itemtoexamine++) {
if (increments[itemtoexamine]==startingnumbersteps) {
if (itemtoexamine<numberofedges-1) { //means there's room to the left
increments[itemtoexamine]=0;
increments[itemtoexamine+1]++;
}
else {
donegrid=true;
donecontour=true;
}
}
}
}
currentvector.clear();
for (int i=0; i<numberofedges; i++) {
double newbrlen=midpointvector[i]*(pow(incrementwidths[i],(increments[i]-startingnumbersteps+((startingnumbersteps+1)/2))));
currentvector.push_back(newbrlen);
}
}
COALaicmode=origCOALaicmode;
}
vector<double> totalbrlen;
totalbrlen.assign(numberofedges-1,0);
int numequaltrees=0;
for (int i=0; i<ContourSearchVector.size(); i++) {
if (ContourSearchVector[i][numberofedges]<=0) {
for (int j=0; j<numberofedges; j++) {
totalbrlen[j]+=ContourSearchVector[i][j];
numequaltrees++;
}
}
}
NodeIterator <Node> n (StartingTree.GetRoot());
NodePtr currentnode = n.begin();
NodePtr rootnode=StartingTree.GetRoot();
int edgenumber=0;
while (currentnode)
{
if (currentnode!=rootnode) {
//cout<<"new brlen = "<<totalbrlen[edgenumber]/(numequaltrees*1.0)<<endl;
currentnode->SetEdgeLength(totalbrlen[edgenumber]/(numequaltrees*1.0));
edgenumber++;
}
currentnode = n.next();
}
nextscorevector=GetCombinedScore(&StartingTree);
nextscore=nextscorevector[0];
}
//////////END Copied from stuff below////////////////
vector<double> bestscorelocalvector=GetCombinedScore(&StartingTree);
bestscorelocal=bestscorelocalvector[0];
if (bestscorelocal==bestscore) {
//cout<<"Before RawBestTrees.push_back(StartingTree);"<<endl;
//StartingTree.ReportTreeHealth();
RawBestTrees.push_back(StartingTree);
//TotalScores.push_back(bestscorelocalvector[0]);
//GTPScores.push_back(bestscorelocalvector[1]);
//StructScores.push_back(bestscorelocalvector[2]);
FormatAndStoreBestTree(&StartingTree,bestscorelocalvector);
scoretype="*G\t";
}
else if (bestscorelocal<bestscore) {
RawBestTrees.clear();
//cout<<"Before RawBestTrees.push_back(StartingTree);"<<endl;
//StartingTree.ReportTreeHealth();
RawBestTrees.push_back(StartingTree);
FormattedBestTrees.clear();
TotalScores.clear();
GTPScores.clear();
StructScores.clear();
BestConversions.clear();
ContourSearchDescription.clear();
// TotalScores.push_back(bestscorelocalvector[0]);
// GTPScores.push_back(bestscorelocalvector[1]);
// StructScores.push_back(bestscorelocalvector[2]);
FormatAndStoreBestTree(&StartingTree,bestscorelocalvector);
bestscore=bestscorelocal;
scoretype="=G\t";
}
else {
scoretype="*L\t";
}
//while (bestscorelocal<0) { //due to error in GTP
// bestscorelocal=ReturnScore(OutputForGTP(&StartingTree));
//}
//cout<<"Before BestTreesThisRep.push_back(StartingTree);"<<endl;
//StartingTree.ReportTreeHealth();
BestTreesThisRep.push_back(StartingTree);
//BestTree=StartingTree;
int movecount=0;
if (status) {
//cout<<"\n\n"<<OutputForGTP(&CurrentTree)<<"\n\n";
// cout<<"Starting tree: \n\n"; //Rewrite the draw function to allow output to a file
// CurrentTree.Draw(cout);
// cout<<"\nScore is "<<bestscore<<"\n";
}
// if (BestTrees.size()==0) {
// BestTrees.push_back(CurrentTree);
// }
bool improvement=true;
bool moreswaps=true;
bool morereassignments=true;
bool moreincreases=true;
bool moredecreases=true;
bool morererootings=true;
while (improvement && (rearrlimit<0 || movecount<rearrlimit)) {
//cout<<"\nimprovement, restarting\n";
improvement=false;
if (chosenmove!=6) { //only reset moves on topology change
bool moreswaps=true;
bool morereassignments=true;
bool moreincreases=true;
bool moredecreases=true;
bool morererootings=true;
}
// cout<<"moreswaps = "<<moreswaps<<" morereassignments = "<<morereassignments<<" moreincreases = "<<moreincreases<<" moredecreases = "<<moredecreases<<" morererootings = "<<morererootings<<endl;
assert(BestTreesThisRep.size()>0);
//for(int i=0;i<BestTreesThisRep.size();i++) {
// BestTreesThisRep[i].Write(cout);
// cout<<endl;
// }
// (BestTreesThisRep.back()).Draw(cout);
// (BestTreesThisRep.back()).ReportTreeHealth();
(BestTreesThisRep.back()).Update();
//(BestTreesThisRep.back()).ReportTreeHealth();
// cout<<"GetRoot: "<<(BestTreesThisRep.back()).GetRoot()<<endl;
assert(BestTreesThisRep.size()>0);
//cout<<"Before ContainingTree CurrentTree=BestTreesThisRep.back();"<<endl;
//(BestTreesThisRep.back()).ReportTreeHealth();
ContainingTree CurrentTree=BestTreesThisRep.back();
CurrentTree.Update();
CurrentTree.ResetBreakVector();
CurrentTree.UpdateCherries();
CurrentTree.SetLeafNumbers();
if ((sppnumfixed==true) || CurrentTree.GetNumLeaves()<=minnumspecies) {
moredecreases=false;
}
if ((sppnumfixed==true) || CurrentTree.GetNumLeaves()>=maxnumspecies) {
moreincreases=false;
}
if (movefreqvector[0]==0 || CurrentTree.GetNumLeaves()<3) {
moreswaps=false;
}
if (movefreqvector[1]==0 || CurrentTree.GetNumLeaves()==1) {
morereassignments=false;
}
if (movefreqvector[2]==0) {
moreincreases=false;
}
if (movefreqvector[3]==0 || CurrentTree.GetNumLeaves()==1) {
moredecreases=false;
}
if (movefreqvector[4]==0 || CurrentTree.GetNumLeaves()<3) {
morererootings=false;
}
//Get list of cherries to collapse; we do this at the start so that we try each cherry at random but only once.
vector<int> TempCherriesToMash;
vector<int> CherriesToMash;
for (int i=0;i<CurrentTree.GetNumCherries(); i++) {
TempCherriesToMash.push_back(i);
}
if (CurrentTree.GetNumCherries()>0) {
gsl_permutation * p = gsl_permutation_alloc (TempCherriesToMash.size());
gsl_permutation_init (p);
gsl_ran_shuffle (r, p->data, TempCherriesToMash.size(), sizeof(size_t));
for (int i=0; i<TempCherriesToMash.size(); i++) {
CherriesToMash.push_back(TempCherriesToMash[gsl_permutation_get (p,i)]);
}
gsl_permutation_free(p);
}
else {
moredecreases=false;
}
//Get list of nodes to reroot on
vector<int> NodesToReRootOn=CurrentTree.GetPotentialNewRoots();
if (NodesToReRootOn.size()==0) {
morererootings=false;
}
//cout<<"There are potentially "<<NodesToReRootOn.size()<<" new roots\n";
//Get list of leaves to split; we do this at the start so that we try each possible leaf (leaves with at least two samples) at random but only once.
vector<int> TempLeavesToSplit;
vector<int> LeavesToSplit;
for (int i=0;i<CurrentTree.GetNumLeaves(); i++) {
int numsamples=0;
for (int j=0;j<convertsamplestospecies.size();j++) {
if(convertsamplestospecies[j]==i+1) {
numsamples++;
}
}
if (numsamples>minsamplesperspecies) {
TempLeavesToSplit.push_back(i+1);
}
}
if (TempLeavesToSplit.size()>0) {
gsl_permutation * q = gsl_permutation_alloc (TempLeavesToSplit.size());
gsl_permutation_init (q);
gsl_ran_shuffle (r, q->data, TempLeavesToSplit.size(), sizeof(size_t));
for (int i=0; i<TempLeavesToSplit.size(); i++) {
LeavesToSplit.push_back(TempLeavesToSplit[gsl_permutation_get (q,i)]);
}
gsl_permutation_free(q);
if (showtries) {
cout<<"Made LeavesToSplitVector of size "<<LeavesToSplit.size()<<endl<<"contents: ";
for (int k=0;k<LeavesToSplit.size();k++) {
cout<<LeavesToSplit[k]<<"\t";
}
cout<<endl;
}
}
else {
moreincreases=false;
}
SamplesToMove.clear();
vector<int> TempSamplesToMove;
int maxsamplesperspecies=0;
vector<int> SamplesPerSpecies(1+CurrentTree.GetNumLeaves(),0); //so SamplesPerSpecies[0] is empty but then SamplesPerSpecies[X] is the number of samples for species X
for (int i=0;i<convertsamplestospecies.size();i++) {
SamplesPerSpecies[(convertsamplestospecies[i])]++;
maxsamplesperspecies=GSL_MAX(maxsamplesperspecies,SamplesPerSpecies[(convertsamplestospecies[i])]);
}
if (maxsamplesperspecies<=minsamplesperspecies || movefreqvector[1]==0) {
morereassignments=false;
}
else {
vector<int> TempSampleDestinations;
for (int i=0;i<CladeVector.size();i++) {
TempSamplesToMove.push_back(i);
}
gsl_permutation * u = gsl_permutation_alloc (TempSamplesToMove.size());
gsl_permutation_init (u);
gsl_ran_shuffle (r, u->data, TempSamplesToMove.size(), sizeof(size_t));
for (int i=0; i<TempSamplesToMove.size(); i++) {
SamplesToMove.push_back(TempSamplesToMove[gsl_permutation_get (u,i)]);
}
gsl_permutation_free(u);
SampleDestinations.clear();
bool enoughdestinations=false;
while (!enoughdestinations && SamplesToMove.size()>0) {
SampleDestinations.clear();
for (int i=0;i<CurrentTree.GetNumLeaves();i++) {
if(TestMoveSamples(SamplesToMove.back(),i+1)) {
TempSampleDestinations.push_back(i+1);
enoughdestinations=true;
}
}
if (!enoughdestinations) {
SamplesToMove.pop_back(); //we're done trying to move that sample
}
}
morereassignments=enoughdestinations;
if(enoughdestinations) {
gsl_permutation * v = gsl_permutation_alloc (TempSampleDestinations.size());
gsl_permutation_init (v);
gsl_ran_shuffle (r, v->data, TempSampleDestinations.size(), sizeof(size_t));
for (int i=0; i<TempSampleDestinations.size(); i++) {
SampleDestinations.push_back(TempSampleDestinations[gsl_permutation_get (v,i)]);
}
gsl_permutation_free(v);
}
}
//cout<<"moreswaps = "<<moreswaps<<" morereassignments = "<<morereassignments<<" moreincreases = "<<moreincreases<<" moredecreases = "<<moredecreases<<" morererootings = "<<morererootings<<endl;
if (movecount==0) {
message="";
if (jackknifesearch) {
message+=jackrep;
message+="\t";
}
message+=replicate;
message+="\t";
message+=movecount;
message+="\t";
message+=CurrentTree.GetNumLeaves();
message+="\t\t";
message+=scoretype;
char outputstring[9];
sprintf(outputstring,"%9.3f",bestscorelocal);
message+=outputstring;
if (!useCOAL && !useMS) {
message+="\t";
sprintf(outputstring,"%9.3f",bestscorelocalvector[1]);
message+=outputstring;
message+="\t";
sprintf(outputstring,"%9.3f",bestscorelocalvector[2]);
message+=outputstring;
}
message+="\t";
sprintf(outputstring,"%9.3f",bestscorelocal);
message+=outputstring;
message+="\t";
sprintf(outputstring,"%9.3f",GSL_MIN(bestscore,bestscorelocal));
message+=outputstring;
message+="\t";
message+=int(FormattedBestTrees.size());
if (moreswaps) {
message+="\ts";
}
else {
message+="\t_";
}
if (morereassignments) {
message+="a";
}
else {
message+="_";
}
if (moreincreases) {
message+="i";
}
else {
message+="_";
}
if (moredecreases) {
message+="d";
}
else {
message+="_";
}
if (morererootings) {
message+="r";
}
else {
message+="_";
}
if (movefreqvector[5]>0) {
message+="b";
}
if (status) {
PrintMessage();
}
}
while ((moreswaps || morereassignments || moreincreases || moredecreases || morererootings) && (rearrlimit<0 || movecount<rearrlimit)) {
//cout<<"while ((moreswaps || morereassignments || moreincreases || moredecreases || morererootings) && (rearrlimit<0 || movecount<rearrlimit)) {"<<endl;
bool somethinghappened=true;
//cout<<"just before ContainingTree NextTree=CurrentTree"<<endl;
CurrentTree.FindAndSetRoot();
CurrentTree.Update();
//CurrentTree.ReportTreeHealth();
ContainingTree NextTree=CurrentTree;
//cout<<"just after ContainingTree NextTree=CurrentTree"<<endl;
//cout<<"just before ContainingTree BrlenChangedTree"<<endl;
ContainingTree BrlenChangedTree;
//cout<<"just after ContainingTree BrlenChangedTree"<<endl;
// cout<<"\n\nOldTree\n"<<ReturnFinalSpeciesTree(CurrentTree)<<endl;
// for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
NextTree.UpdateCherries();
NextTree.Update();
NextTree.GetNodeDepths();
vector<int> Originalconvertsamplestospecies=convertsamplestospecies;
//decide chosen move
if (CurrentTree.GetNumLeaves()==1) {
moreswaps=false;
morereassignments=false;
moredecreases=false;
morererootings=false;
}
double randomvalue=double(gsl_ran_flat (r,0,1));
chosenmove=0;
nxsstring chosenmovestring="?";
vector<double> possiblemovefreqvector;
vector<int> possiblemovechoicevector;
vector<nxsstring> possiblemoveabbrevvector;
if (moreswaps) {
possiblemovefreqvector.push_back(movefreqvector[0]);
possiblemovechoicevector.push_back(1);
possiblemoveabbrevvector.push_back("s");
}
if (morereassignments) {
possiblemovefreqvector.push_back(movefreqvector[1]);
possiblemovechoicevector.push_back(2);
possiblemoveabbrevvector.push_back("a");
}
if (moreincreases) {
possiblemovefreqvector.push_back(movefreqvector[2]);
possiblemovechoicevector.push_back(3);
possiblemoveabbrevvector.push_back("i");
}
if (moredecreases) {
possiblemovefreqvector.push_back(movefreqvector[3]);
possiblemovechoicevector.push_back(4);
possiblemoveabbrevvector.push_back("d");
}
if (morererootings) {
possiblemovefreqvector.push_back(movefreqvector[4]);
possiblemovechoicevector.push_back(5);
possiblemoveabbrevvector.push_back("r");
}
possiblemovefreqvector.push_back(movefreqvector[5]); //the brlen optimization
if (movefreqvector[5]>0) {
possiblemovechoicevector.push_back(6);
possiblemoveabbrevvector.push_back("b");
}
double sumofpossiblemovefreqs=0;
for (int k=0; k<possiblemovefreqvector.size(); k++) {
sumofpossiblemovefreqs+=possiblemovefreqvector[k];
}
for (int k=0; k<possiblemovefreqvector.size(); k++) {
possiblemovefreqvector[k]=(possiblemovefreqvector[k])/sumofpossiblemovefreqs;
// cout<<"possiblemovefreqvector["<<k<<"] = "<<possiblemovefreqvector[k]<<"\tpossiblemovechoicevector["<<k<<"] = "<<possiblemovechoicevector[k]<<endl;
}
double runningtotal=0;
for (int k=0; k<possiblemovefreqvector.size(); k++) {
runningtotal+=possiblemovefreqvector[k];
// cout<<"randomvalue = "<<randomvalue<<" runningtotal = "<<runningtotal;
if (randomvalue<=runningtotal) {
chosenmove=possiblemovechoicevector[k];
chosenmovestring=possiblemoveabbrevvector[k];
//cout<<" chosenmove is "<<chosenmove;
break;
}
//cout<<endl;
}
//cout<<"randomvalue is "<<randomvalue<<" chosenmove is "<<chosenmove<<" moreswaps = "<<moreswaps<<" morereassignments = "<<morereassignments<<" moreincreases = "<<moreincreases<<" moredecreases = "<<moredecreases<<" morererootings = "<<morererootings<<endl;
movecount++;
// cout<<"convertsamplestospecies\n";
// for (int m=0;m<convertsamplestospecies.size();m++) {
// cout<<convertsamplestospecies[m]<<" ";
// }
// cout<<endl;
//cout<<"chosenmove = "<<chosenmove<<endl;
if (chosenmove==1) { //Try branch swap
if (showtries) {
cout<<"Trying branch swap"<<endl;
cout<<"Start tree = \n";
NextTree.Draw(cout);
}
NextTree.SetBreakVector(CurrentTree.GetBreakVector());
NextTree.SetAttachVector(CurrentTree.GetAttachVector());
NextTree.FindAndSetRoot();
NextTree.Update();
moreswaps=NextTree.NextSPR();
if (showtries) {
cout<<"Swap tree = \n";
NextTree.Draw(cout);
}
CurrentTree.SetBreakVector(NextTree.GetBreakVector()); //due to how the vectors are updated during a swap.
CurrentTree.SetAttachVector(NextTree.GetAttachVector());
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else if(chosenmove==2) {
if (showtries) {
cout<<"Moving a sample from one species to another\n";
}
if (showtries) {
cout<<"Start assignment = (";
for (int i=0;i<(CladeVector[SamplesToMove.back()]).size();i++) {
cout<<" "<<CladeVector[SamplesToMove.back()][i];
}
cout<<" )\n";
for (int i=0; i<convertsamplestospecies.size();i++) {
cout<<convertsamplestospecies[i]<<" ";
}
cout<<"\n";
}
morereassignments=MoveSamples(Originalconvertsamplestospecies);
if (showtries) {
for (int i=0; i<convertsamplestospecies.size();i++) {
cout<<convertsamplestospecies[i]<<" ";
}
cout<<"\n";
}
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else if(chosenmove==3) {
//cout<<"LeavesToSplitVect\n";
//for (int i=0;i<LeavesToSplit.size();i++) {
// cout<<" "<<LeavesToSplit[i];
// }
//increase the number of species
if (showtries) {
cout<<"LeavesToSplitVect\n";
cout<<"vector size is "<<LeavesToSplit.size()<<endl;
for (int i=0;i<LeavesToSplit.size();i++) {
cout<<" "<<LeavesToSplit[i];
}
cout<<"Splitting a leaf\n";
}
int ChosenLeaf=LeavesToSplit.back();
//cout<<"\nChosenLeaf = "<<ChosenLeaf<<endl;
LeavesToSplit.pop_back();
// cout<<"\nVector size now "<<LeavesToSplit.size();
if (LeavesToSplit.size()==0) {
moreincreases=false;
}
// cout<<"\nmoreincreases value = "<<moreincreases<<endl;
vector<int>changevector;
if (showtries) {
cout<<"Split leaf start tree = \n";
NextTree.Draw(cout);
}
changevector=NextTree.SplitLeaf(ChosenLeaf); //first element is split taxon, second element is new taxon
if (showtries) {
cout<<"Final tree = \n";
NextTree.Draw(cout);
}
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
//now try optimizing the new assignments (which descendant the samples go with) before actually getting the score
vector<int> samplestomove;
int sampletostay;
for (int i=0;i<convertsamplestospecies.size();i++) {
//cout<<convertsamplestospecies[i]<<" ";
if(convertsamplestospecies[i]==changevector[0]) {
samplestomove.push_back(i);
}
}
// cout<<endl;
sampletostay=samplestomove.back();
samplestomove.pop_back();
//so idea here is to try all combinations, with one sample fixed in the old species and the others allowed to be in either species
vector<int> Startingconvertsamplestospecies=convertsamplestospecies;
vector<int> Bestconvertsamplestospecies=convertsamplestospecies;
bool bestscorefound=false;
double bestscoreforcombination=GSL_POSINF;
vector<double> lastscorevector;
// cout<<"leaf split starting assignment"<<endl;
// for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
size_t j;
double numberofcomparisons=0;
for(int l=1;l<=samplestomove.size();l++){
numberofcomparisons+=gsl_sf_choose(samplestomove.size(),l);
if (showtries) {
cout<<"numberofcomparisons now "<<numberofcomparisons<<endl;
}
if (numberofcomparisons<1) {
cout<<"Number of comparisons was "<<numberofcomparisons<<" and samplestomove.size() was "<<samplestomove.size()<<endl;
}
assert(numberofcomparisons>0);
}
double probofacomb=(pow((1.0*numberofcomparisons),1.0/chosensubsampling))/(1.0*numberofcomparisons); //a way to reduce the search effort
if (probofacomb*numberofcomparisons<10) { //so that if we choose a ridiculous number we expect to do at least ten swaps
probofacomb=1;
}
else if (probofacomb!=probofacomb) {
if (showtries) {
cout<<"prob of a comb is "<<probofacomb<<" so we're adjusting it"<<endl;
}
probofacomb=GSL_MIN(100.0/numberofcomparisons,1);
}
if (showtries) {
cout<<"Prob of a comb is "<<probofacomb<<" expected number of assignments to examine is "<<probofacomb*numberofcomparisons<<endl;
}
int combinationmoves=0;
//while(bestscorefound==false) {
gsl_combination *c;
vector<int> LastTriedconvertsamplestospecies;
for (j=1;j<=samplestomove.size();j++) { //always move
c=gsl_combination_calloc(samplestomove.size(),j);
do
{
combinationmoves++;
if (gsl_ran_bernoulli(r,probofacomb)==1 || steepest || exhaustive) {
if (showtries) {
cout<<combinationmoves<<"/"<<numberofcomparisons<<" = ";
cout<<(1.0*combinationmoves)/(1.0*numberofcomparisons)<<endl;
//ProgressBar(0);
}
convertsamplestospecies=Startingconvertsamplestospecies;
for (int k=0;k<j;k++) {
convertsamplestospecies[samplestomove[int(gsl_combination_get(c,k))]]=changevector[1]; //assign this taxon to the new species
}
LastTriedconvertsamplestospecies=convertsamplestospecies;
// for (int m=0;m<convertsamplestospecies.size();m++) {
// cout<<convertsamplestospecies[m]<<" ";
// }
//for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
// for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
vector<double> newscoreforcombinationvector=GetCombinedScore(&NextTree);
double newscoreforcombination=newscoreforcombinationvector[0];
lastscorevector.swap(newscoreforcombinationvector);
if (showtries) {
cout<<"Score "<<newscoreforcombination<<" assign ( ";
for (int i=0; i<convertsamplestospecies.size();i++) {
cout<<convertsamplestospecies[i]<<" ";
}
cout<<" )"<<endl;
}
if ((newscoreforcombination<bestscoreforcombination) && (1==gsl_finite(newscoreforcombination))) { //the second condition makes sure it isn't nan or inf
if (showtries) {
cout<<" Above is BETTER"<<endl;
}
bestscoreforcombination=newscoreforcombination;
Bestconvertsamplestospecies=convertsamplestospecies;
if (!steepest && !exhaustive) {
bestscorefound=true;
}
}
// cout<<"\t"<<newscoreforcombination<<endl;
}
}
while ((gsl_combination_next (c) == GSL_SUCCESS) && (bestscorefound==false));
gsl_combination_free(c);
}
//gsl_combination_free(c); /moved up to stop leak
// }
convertsamplestospecies.swap(Bestconvertsamplestospecies);
// cout<<"Final assignment after split leaf"<<endl;
// for (int i=0;i<convertsamplestospecies.size();i++) {
// cout<<convertsamplestospecies[i]<<" ";
// }
// cout<<endl;
// cout<<"Bestscorefound = "<<bestscorefound<<endl;
//convertsamplestospecies=Originalconvertsamplestospecies;
if(isinf(bestscoreforcombination)==0) {//so the best score is NOT infinity
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else {
convertsamplestospecies=LastTriedconvertsamplestospecies; //otherwise, an n-species tree is evaluated with the original convertsamplestospecies vector, which has n-1 species
nextscorevector.swap(lastscorevector);
nextscore=nextscorevector[0];
}
}
else if(chosenmove==4) { //reduce the number of species, if possible
//cout<<"\nCherry vector"<<endl;
//for (int i=0;i<CherriesToMash.size(); i++) {
// cout<<" "<<CherriesToMash[i];
//}
//cout<<endl;
if (showtries) {
cout<<"Collapsing a cherry\n";
}
int CherryToMash=CherriesToMash.back();
//cout<<"Cherry to mash = "<<CherryToMash<<endl;
CherriesToMash.pop_back(); //this is so we look at each cherry once
if (CherriesToMash.size()==0) {
moredecreases=false;
}
vector<int>changevector;
if (showtries) {
cout<<"Cherry collapse start tree = \n";
NextTree.Draw(cout);
}
changevector=NextTree.CollapseCherry(CherryToMash);
for (int i=0; i<convertsamplestospecies.size(); i++) {
if(convertsamplestospecies[i]==changevector[1]) {
convertsamplestospecies[i]=changevector[0];
}
if(convertsamplestospecies[i]>changevector[1]) {
convertsamplestospecies[i]--; //so if we have taxa 1-8, and delete taxon 6, taxon 7 becomes the new 6 and taxon 8 becomes the new 7
}
}
if (showtries) {
cout<<"Next tree = \n";
NextTree.Draw(cout);
}
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else if (chosenmove==5) {
if (showtries) {
cout<<"Rerooting"<<endl;
}
int NodeToReRootOnNum=NodesToReRootOn.back();
NodesToReRootOn.pop_back(); //this is so we look at each potential position once
if (NodesToReRootOn.size()==0) {
morererootings=false;
}
vector<int>changevector;
if (showtries) {
cout<<"Rerooting start tree = \n";
NextTree.Draw(cout);
}
NextTree.ReRootTree(NextTree.SelectNodeToReRootOn(NodeToReRootOnNum));
if (showtries) {
cout<<"Next tree = \n";
NextTree.Draw(cout);
}
if (useCOAL || useMS) {
NextTree.InitializeMissingBranchLengths();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else if (chosenmove==6) {
if (showtries) {
cout<<"Doing branch length optimization\n";
}
NextTree.FindAndSetRoot();
NextTree.Update();
NextTree.InitializeMissingBranchLengths();
if (useCOAL) {
NextTree.RandomlyModifySingleBranchLength(markedmultiplier,brlensigma);
}
else if (useMS) {
if (0.2>gsl_ran_flat (r,0,1) || NextTree.GetNumLeaves()<3) {
NextTree.ModifyTotalBranchLength(brlensigma);
}
else {
NextTree.NodeSlideBranchLength(markedmultiplier);
}
}
else {
message+="Warning: attempting to do branch length estimation with a criterion that doesn't take it into account";
PrintMessage();
}
nextscorevector=GetCombinedScore(&NextTree);
nextscore=nextscorevector[0];
}
else {
somethinghappened=false;
movecount--;
}
if (somethinghappened) {
//cout<<"Something happened"<<endl;
/* if (NextTree.GetNumLeaves()==NextTree.GetNumInternals()) {
errormsg="Error: num leaves = num internals\nLast move chosen was";
errormsg+=chosenmove;
NextTree.ReportTreeHealth();
throw XNexus( errormsg);
}*/
assert(CheckConvertSamplesToSpeciesVector(false));
scoretype="\t";
bool modifiedscoretype=false;
NextTree.FindAndSetRoot();
NextTree.Update();
//brlen optimization
if (useCOAL && NextTree.GetNumLeaves()>1) { //only do this is there are at least two species (brlen doesn't matter for single species);
NextTree.InitializeMissingBranchLengths();
for (int brlenrep=0; brlenrep<numbrlenadjustments; brlenrep++) {
BrlenChangedTree.SetRoot(NextTree.CopyOfSubtree(NextTree.GetRoot()));
BrlenChangedTree.InitializeMissingBranchLengths();
BrlenChangedTree.RandomlyModifySingleBranchLength(markedmultiplier,brlensigma);
vector<double> brlenscorevector=GetCombinedScore(&BrlenChangedTree);
if (brlenscorevector[0]<=nextscore) {
if (brlenscorevector[0]<nextscore) {
brlenrep=0; //so we restart from the new optimum
nextscore=brlenscorevector[0];
nextscorevector[0]=brlenscorevector[0]; //other elements are the same
}
NextTree.SetRoot(BrlenChangedTree.CopyOfSubtree(BrlenChangedTree.GetRoot()));
NextTree.FindAndSetRoot();
NextTree.Update();
}
}
}
//Contour search
if (useCOAL && NextTree.GetNumLeaves()>1) {
vector<double> speciestreebranchlengthvector;
double startingwidth=contourstartingwidth; //start by looking at all brlen between pointestimate/startingwidth and startingwidth*pointestimated
double startingnumbersteps=contourstartingnumbersteps; //works best if odd
int maxrecursions=contourMaxRecursions;
int recursions=0;
int numberofedges=0;
bool donecontour=false;
while (!donecontour) {
recursions++;
//cout<<"donecontour"<<endl;
vector<double> midpointvector;
vector<double> incrementwidths;
vector<double> currentvector;
speciestreebranchlengthvector.clear();
ContourSearchVector.clear();
NextTree.FindAndSetRoot();
NextTree.Update();
NextTree.InitializeMissingBranchLengths();
BrlenChangedTree.SetRoot(NextTree.CopyOfSubtree(NextTree.GetRoot()));
BrlenChangedTree.Update();
BrlenChangedTree.InitializeMissingBranchLengths();
//BrlenChangedTree.ReportTreeHealth();
vector<double> speciestreebranchlengthvector;
NodeIterator <Node> n (BrlenChangedTree.GetRoot());
NodePtr currentnode = n.begin();
NodePtr rootnode=BrlenChangedTree.GetRoot();
numberofedges=0;
while (currentnode)
{
if (currentnode!=rootnode) {
double edgelength=currentnode->GetEdgeLength();
if (gsl_isnan(edgelength)) {
edgelength=1.0;
}
speciestreebranchlengthvector.push_back(edgelength); //get midpoint edges
double basestep=exp(2.0*log(startingwidth)/(startingnumbersteps-1));
//cout<<"basestep = "<<basestep<<endl;
//brlen if startingnumbersteps=5 and starting width is 4 is edgelength/4, edgelength/2, edgelength, edgelength*2, edgelength*4; aka 2^-2, 2^-1,
//double mindepth=GSL_MAX(edgelength*(1-startingwidth),0);
//double maxdepth=edgelength*(1+startingwidth);
//cout<<"mindepth = "<<mindepth<<" maxdepth = "<<maxdepth<<endl<<endl;
double smallestbrlen=edgelength*(pow(basestep,(0-startingnumbersteps+((startingnumbersteps+1.0)/2.0))));
midpointvector.push_back(edgelength);
currentvector.push_back(smallestbrlen); //start with minimum values, then move up
incrementwidths.push_back(basestep);
numberofedges++;
}
currentnode = n.next();
}
bool donegrid=false;
vector<int> increments;
increments.assign(numberofedges,0);
int origCOALaicmode=COALaicmode;
COALaicmode=0;
vector<double> startingscorevector=GetCombinedScore(&NextTree);
double currentscore=startingscorevector[0];
while (!donegrid) {
for (int i=0;i<currentvector.size();i++) {
//cout<<currentvector[i]<<" ";
}
//cout<<endl;
//cout<<"donegrid"<<endl;
currentnode = n.begin();
int nodenumber=0;
while (currentnode)
{
if (currentnode!=rootnode) {
currentnode->SetEdgeLength(currentvector[nodenumber]);
nodenumber++;
}
currentnode = n.next();
}
vector<double> brlenscorevector=GetCombinedScore(&BrlenChangedTree);
double brlenscore=brlenscorevector[0]-currentscore;
//cout<<"score "<<brlenscore;
bool atmargin=false;
for (int i=0; i<numberofedges; i++) {
if ((increments[i]==0) || (increments[i]==startingnumbersteps-1)) {
atmargin=true;
}
//cout<<" "<<currentvector[i];
}
if (atmargin) { //if we're bumping up against minimum branchlength, there's nowhere further to expand the grid
for (int i=0; i<numberofedges; i++) {
if (currentvector[i]==0) {
atmargin=false;
}
}
}
//cout<<endl;
if (atmargin) { //we're at a margin of the space; want to make sure that the region within two lnL is inside this region
if (brlenscore<2 && recursions<maxrecursions) { //our region is too small, since points 2 lnL units away from the max are outside the region
//message="Starting width of ";