Skip to content

Commit

Permalink
Added new distribution: LogUniform
Browse files Browse the repository at this point in the history
  • Loading branch information
adamallo committed Nov 13, 2015
1 parent 3b5424f commit b36f07a
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 13 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ D_LIBS= -lgsl -lgslcblas -lsqlite3 -lmpfr

#Static libraries for MAC
_S_LIBS= libgsl.a libgslcblas.a libsqlite3.a libgmp.a libmpfr.a
LD_LIBRARY_PATH=/opt/local/lib
MS_LIBS=$(patsubst %,$(LD_LIBRARY_PATH)/%,$(_S_LIBS)) #BSD's LD needs the full path

#Static libraries for Linux
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -842,11 +842,11 @@
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "src/sampling.c"
timestampString = "458058871.983623"
timestampString = "469147207.671261"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "138"
endingLineNumber = "138"
startingLineNumber = "140"
endingLineNumber = "140"
landmarkName = "sample_distr()"
landmarkType = "7">
</BreakpointContent>
Expand All @@ -858,11 +858,11 @@
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "src/sampling.c"
timestampString = "465449327.617127"
timestampString = "469147207.671261"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "331"
endingLineNumber = "331"
startingLineNumber = "353"
endingLineNumber = "353"
landmarkName = "Print_Sampling()"
landmarkType = "7">
</BreakpointContent>
Expand Down Expand Up @@ -954,11 +954,11 @@
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "src/sampling.c"
timestampString = "465448052.658499"
timestampString = "469147207.671261"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "265"
endingLineNumber = "265"
startingLineNumber = "287"
endingLineNumber = "287"
landmarkName = "ParseSampling()"
landmarkType = "7">
</BreakpointContent>
Expand Down Expand Up @@ -995,5 +995,21 @@
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
<BreakpointProxy
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
shouldBeEnabled = "Yes"
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "src/sampling.c"
timestampString = "469149337.983703"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
startingLineNumber = "158"
endingLineNumber = "158"
landmarkName = "sample_distr()"
landmarkType = "7">
</BreakpointContent>
</BreakpointProxy>
</Breakpoints>
</Bucket>
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@
argument = "-sb f:0.00001 -sp f:10000"
isEnabled = "NO">
</CommandLineArgument>
<CommandLineArgument
argument = "-i /Users/Diego/Desktop/newsim/SimPhy_configV1.conf"
isEnabled = "YES">
</CommandLineArgument>
<CommandLineArgument
argument = "-i /Users/Diego/Desktop/Sim-Phy_Debug/itrees.txt"
isEnabled = "NO">
Expand Down
2 changes: 1 addition & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1512,7 +1512,7 @@ int main (int argc, char **argv)

void PrintUsage(void)
{
printf("\nUsage: ./SimPhy -[Parameter code] value(i|r|c|b|*) ...\n\nValue kinds\n\ti=integer\n\tr=real\n\tc=character string\n\tb=boolean(1 or 0)\n\t*=sampling notation\n\nPARAMETERS\n__________\n__________\n\n-R...→ Replicates\n_________________\n\n\t-RS i: Number of species tree replicates (study replicates).\n\t-RL *: Number of locus trees per species tree.\n\t-RG i: Number of gene trees per locus tree (Not for general usage).\n\n-G... → Genome-wide parameters (sampled for each species tree)\n______________________________________________________________\n\n\t-GB *: Duplication parameter (to use with LB)\n\t-GD *: Loss rate parameter (to use with LD).\n\t-GT *: Transfer parameter (to use with LT).\n\t-GG *: Gene conversion parameter (to use with LG).\n\t-GP *: Gene-by-lineage-specific parameter (to use with HG).\n\n-S... → Species tree\n____________________\n\n\t-S c: Fixed species tree (extended Newick format).\n\t-SR c: Nexus file with species trees.\n\t-SB *: Speciation rate (events/time unit).\n\t-SD *: Extinction rate (events/time unit).\n\t-ST *: Species tree height (time units).\n\t-SL *: Number of taxa.\n\t-SO *: Ratio between ingroup height and the branch from the root to the ingroup. If this parameter is not set the outgroup is not simulated.\n\t-SI *: Number of individuals per species.\n\t-SP *: Tree-wide effective population size.\n\t-SU *: Tree-wide substitution rate.\n\t-SG *: Tree-wide generation time.\n\n-L... → Locus tree\n__________________\n\n\t-L c: Locus tree (extended Newick format).\n\t-LR c: Nexus file containing locus trees.\n\t-LB *: Duplication rate (events/generation).\n\t-LD *: Loss rate (events/generation).\n\t-LT *: Horizontal gene transfer (HGT) rate (events/generation). \n\t-LG *: Gene conversion (GC) rate (events/generation).\n\t-LK b: Distance-dependent HGT/GC: Determines whether the sampling of receptors of genetic material depends (1) or not (0) on the evolutionary distance between candidates and donors.\n\t-LL i: Minimum number of locus tree leaves.\n\t-LS i: Minimum number of species represented by the locus tree leaves.\n\n-H...→ Substitution rate heterogeneity parameters\n_________________________________________________\n\n\t-HS *: Species-specific branch rate heterogeneity modifiers.\n\t-HL *: Gene-family-specific rate heterogeneity modifiers.\n\t-HH *:Gene-by-lineage-specific locus tree parameter (to use with the HG argument below).\n\t-HG *: Gene-by-lineage-specific rate heterogeneity modifiers.\n\n-C... → Global options\n______________________\n\n\t-CS i: Random number generator seed.\n\t-CE r: Precision of the Brent’s method for root-finding when sampling the multilocus coalescent. (Not for general usage)\n\n-I c: Input configuration file\n\n-V [0,6]: Verbosity. (Note: the bigger the verbosity the slower SimPhy becomes. Levels over 3 may only make sense for debugging or study the way SimPhy works).\n\n-O...→ Output\n_____________\n\n\t-O c: Common output prefix-name (for folder and names).\n\t-OT b: Determines whether the species and locus tree branches are written in number of generations (0) or time units (1).\n\t-OM b: Activates the tree mapping output.\n\t-OD b: Activates the SQLite database output.\n\t-OP b: Activates the logging of sampled options.\n\t-OC b: Activates the logging of original command line parameters and input configuration files.\n\t-OL b: Activates the output of trees with internal nodes labelled by its post-order id starting from 0.\n\t-ON b: Activates the output of the bounded locus subtrees file.\n\n\nSampling notation\n_________________\n\nNotation squeme= Distribution_code:parameter_1,parameter_2,...,parameter_n\nDistribution codes:\n\tF: fixed value\n\tU: Uniform\n\tN: Normal\n\tE: Exponential\n\tG: Gamma\n\tL: Lognormal\n\tSL: Lognormal * constant\nExample: N:1,1 (Normal with mean and sd equal 1)\n\nExample\n_______\n\nsimphy -sb f:0.000001 -ld f:0.0000005 -lb f:0.0000005 -lt f:0.0000005 -rs 100 -rl U:10,100 -rg 1 -o SimPhy_test -sp f:10000 -su f:0.00001 -sg f:1 -sl U:20,50 -st f:1000000 -om 1 -v 2 -od 1 -op 1 -oc 1 -on 1 -cs 22\n\n");
printf("\nUsage: ./SimPhy -[Parameter code] value(i|r|c|b|*) ...\n\nValue kinds\n\ti=integer\n\tr=real\n\tc=character string\n\tb=boolean(1 or 0)\n\t*=sampling notation\n\nPARAMETERS\n__________\n__________\n\n-R...→ Replicates\n_________________\n\n\t-RS i: Number of species tree replicates (study replicates).\n\t-RL *: Number of locus trees per species tree.\n\t-RG i: Number of gene trees per locus tree (Not for general usage).\n\n-G... → Genome-wide parameters (sampled for each species tree)\n______________________________________________________________\n\n\t-GB *: Duplication parameter (to use with LB)\n\t-GD *: Loss rate parameter (to use with LD).\n\t-GT *: Transfer parameter (to use with LT).\n\t-GG *: Gene conversion parameter (to use with LG).\n\t-GP *: Gene-by-lineage-specific parameter (to use with HG).\n\n-S... → Species tree\n____________________\n\n\t-S c: Fixed species tree (extended Newick format).\n\t-SR c: Nexus file with species trees.\n\t-SB *: Speciation rate (events/time unit).\n\t-SD *: Extinction rate (events/time unit).\n\t-ST *: Species tree height (time units).\n\t-SL *: Number of taxa.\n\t-SO *: Ratio between ingroup height and the branch from the root to the ingroup. If this parameter is not set the outgroup is not simulated.\n\t-SI *: Number of individuals per species.\n\t-SP *: Tree-wide effective population size.\n\t-SU *: Tree-wide substitution rate.\n\t-SG *: Tree-wide generation time.\n\n-L... → Locus tree\n__________________\n\n\t-L c: Locus tree (extended Newick format).\n\t-LR c: Nexus file containing locus trees.\n\t-LB *: Duplication rate (events/generation).\n\t-LD *: Loss rate (events/generation).\n\t-LT *: Horizontal gene transfer (HGT) rate (events/generation). \n\t-LG *: Gene conversion (GC) rate (events/generation).\n\t-LK b: Distance-dependent HGT/GC: Determines whether the sampling of receptors of genetic material depends (1) or not (0) on the evolutionary distance between candidates and donors.\n\t-LL i: Minimum number of locus tree leaves.\n\t-LS i: Minimum number of species represented by the locus tree leaves.\n\n-H...→ Substitution rate heterogeneity parameters\n_________________________________________________\n\n\t-HS *: Species-specific branch rate heterogeneity modifiers.\n\t-HL *: Gene-family-specific rate heterogeneity modifiers.\n\t-HH *:Gene-by-lineage-specific locus tree parameter (to use with the HG argument below).\n\t-HG *: Gene-by-lineage-specific rate heterogeneity modifiers.\n\n-C... → Global options\n______________________\n\n\t-CS i: Random number generator seed.\n\t-CE r: Precision of the Brent’s method for root-finding when sampling the multilocus coalescent. (Not for general usage)\n\n-I c: Input configuration file\n\n-V [0,6]: Verbosity. (Note: the bigger the verbosity the slower SimPhy becomes. Levels over 3 may only make sense for debugging or study the way SimPhy works).\n\n-O...→ Output\n_____________\n\n\t-O c: Common output prefix-name (for folder and names).\n\t-OT b: Determines whether the species and locus tree branches are written in number of generations (0) or time units (1).\n\t-OM b: Activates the tree mapping output.\n\t-OD b: Activates the SQLite database output.\n\t-OP b: Activates the logging of sampled options.\n\t-OC b: Activates the logging of original command line parameters and input configuration files.\n\t-OL b: Activates the output of trees with internal nodes labelled by its post-order id starting from 0.\n\t-ON b: Activates the output of the bounded locus subtrees file.\n\n\nSampling notation\n_________________\n\nNotation squeme= Distribution_code:parameter_1,parameter_2,...,parameter_n\nDistribution codes:\n\tF: fixed value\n\tU: Uniform\n\tN: Normal\n\tE: Exponential\n\tG: Gamma\n\tLN: Lognormal\n\tLU: LogUniform\n\tSL: Lognormal * constant\nExample: N:1,1 (Normal with mean and sd equal 1)\n\nExample\n_______\n\nsimphy -sb f:0.000001 -ld f:0.0000005 -lb f:0.0000005 -lt f:0.0000005 -rs 100 -rl U:10,100 -rg 1 -o SimPhy_test -sp f:10000 -su f:0.00001 -sg f:1 -sl U:20,50 -st f:1000000 -om 1 -v 2 -od 1 -op 1 -oc 1 -on 1 -cs 22\n\n");

fflush(stdout);

Expand Down
30 changes: 28 additions & 2 deletions src/sampling.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ long int sample_distr(gsl_rng *r,int n_arg,...)
{
va_list ap;
int i;
double a=0;
double b=0;
sampling_unit *variable,**variables;
variables=calloc(n_arg, sizeof(sampling_unit*));
va_start(ap, n_arg);
Expand Down Expand Up @@ -150,6 +152,11 @@ long int sample_distr(gsl_rng *r,int n_arg,...)
case LOGNORMAL:
set_propsampling(variable, gsl_ran_lognormal(r,GetCastedDoubleUSU(variable->params[0], variable->params_type[0]),GetCastedDoubleUSU(variable->params[1], variable->params_type[1])));
break;
case LOGUNIFORM:
a=log(GetCastedDoubleUSU(variable->params[0], variable->params_type[0]));
b=log(GetCastedDoubleUSU(variable->params[1], variable->params_type[1]));
set_propsampling(variable, exp(a+(b-a)*gsl_rng_uniform(r)));
break;
case LOGNORMAL_MULT:
set_propsampling(variable,(gsl_ran_lognormal(r,GetCastedDoubleUSU(variable->params[0], variable->params_type[0]),GetCastedDoubleUSU(variable->params[1], variable->params_type[1]))*GetCastedDoubleUSU(variable->params[2], variable->params_type[2])));
break;
Expand Down Expand Up @@ -195,8 +202,23 @@ long int ParseSampling(char * p, sampling_unit * sample, const sampling_table sa
n_p=2;
break;
case 'L':
sample->distribution_code=LOGNORMAL;
n_p=2;
switch (toupper(*(p+1)))
{
case 'N':
sample->distribution_code=LOGNORMAL;
n_p=2;
++p;
break;
case 'U':
sample->distribution_code=LOGUNIFORM;
n_p=2;
++p;
break;

default:
return SETTINGS_ERROR;
break;
}
break;
case 'S':
switch (toupper(*(p+1)))
Expand Down Expand Up @@ -355,6 +377,10 @@ void Print_Sampling(sampling_unit *sample, char * buffer, const sampling_table s
sprintf(buffer,"LogN(");
n_param=2;
break;
case LOGUNIFORM:
sprintf(buffer,"LogU[");
n_param=2;
break;
case LOGNORMAL_MULT:
sprintf(buffer,"LogN(");
n_param=3;
Expand Down
2 changes: 1 addition & 1 deletion src/sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extern int MAX_IT;
* Normal distribution (2 parameters).
*******************************************************************************/

enum DISTRIBUTIONS {FIXED=0, UNIFORM=1, NORMAL=2, EXPONENTIAL=3, GAMMA=4, LOGNORMAL=5, LOGNORMAL_MULT=7};
enum DISTRIBUTIONS {FIXED=0, UNIFORM=1, NORMAL=2, EXPONENTIAL=3, GAMMA=4, LOGNORMAL=5,LOGUNIFORM=6, LOGNORMAL_MULT=7};

/**
* \enum TYPES
Expand Down

0 comments on commit b36f07a

Please sign in to comment.