Skip to content

Commit

Permalink
User-defined filename/path for expert config file (#37)
Browse files Browse the repository at this point in the history
Expert config filename/path can be user-defined:
* Add expert config option to Nextflow main.nf
* Add expert config option to ksrates CLI commands
* Add expert config argument in Python modules
* Adapt documentation to new expert config option
* Add "-e" as short option name for "--expert"
* Removing redundant Nextflow log lines
  • Loading branch information
Cecilia-Sensalari committed Mar 24, 2022
1 parent 621191f commit bfbb623
Show file tree
Hide file tree
Showing 16 changed files with 229 additions and 78 deletions.
12 changes: 11 additions & 1 deletion doc/source/configuration.rst
Expand Up @@ -201,7 +201,17 @@ The Nextflow configuration file is used to configure various settings for the *k
Expert configuration file
=========================

This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file has to be named `config_expert.txt` and is then automatically detected when launching *ksrates*. The following can be used as a template::
This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file can be provided in the command line through the ``--expert`` option. However, when named with default name ``config_expert.txt`` and placed in the launching directory, the file is automatically detected without needing the option in the command line.

Syntax for the Nextflow pipeline::

nextflow run VIB-PSB/ksrates --config config_elaeis.txt --expert path/to/my_expert_config.txt
Syntax for single `ksrates` commands::

ksrates init config_elaeis.txt --expert path/to/my_expert_config.txt

The following can be used as a template::

[EXPERT PARAMETERS]
Expand Down
4 changes: 2 additions & 2 deletions ksrates/cluster_anchor_ks.py
Expand Up @@ -16,8 +16,8 @@
from ksrates.fc_cluster_anchors import subfolder
from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE

def cluster_anchor_ks(config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt):
config = fcConf.Configuration(config_file)
def cluster_anchor_ks(config_file, expert_config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt):
config = fcConf.Configuration(config_file, expert_config_file)
init_logging(f"Clustering anchorpoints Ks values to reconstruct recent WGD events", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/compute_peaks.py
Expand Up @@ -10,9 +10,9 @@
from ksrates.utils import init_logging


def compute_peaks(config_file, ortholog_pairs_file):
def compute_peaks(config_file, expert_config_file, ortholog_pairs_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Computing ortholog distribution peaks with related error", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/correct.py
Expand Up @@ -10,9 +10,9 @@
from ksrates.utils import init_logging


def correct(config_file, trios_file):
def correct(config_file, expert_config_file, trios_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Rate-adjustment of ortholog Ks distributions", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/exp_log_mixture.py
Expand Up @@ -15,9 +15,9 @@
from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE


def exp_log_mixture(config_file, paralog_tsv_file, correction_table_file):
def exp_log_mixture(config_file, expert_config_file, paralog_tsv_file, correction_table_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Exponential-Lognormal mixture model on Ks paranome", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
30 changes: 24 additions & 6 deletions ksrates/fc_configfile.py
Expand Up @@ -9,7 +9,7 @@

class Configuration:

def __init__(self, config_path):
def __init__(self, config_path, expert_config_file):
"""
Initializes the configuration file and the expert configuration file.
This latter is always named "config_expert.txt", the code looks for it
Expand All @@ -19,13 +19,31 @@ def __init__(self, config_path):
# Configuration file
self.config = configparser.ConfigParser()
self.config.read(config_path)

# Expert configuration file
if os.path.exists("config_expert.txt"):
self.expert_config = configparser.ConfigParser()
self.expert_config.read("config_expert.txt")
self.expert_config = configparser.ConfigParser()
# If there is no user-defined expert config file given through "--expert" in the command line,
# variable "expert_config_file" was set to an empty string in the CLI code block
if expert_config_file == "":
# If there is in the launching folder an expert config file called with the default name "config_expert.txt", fallback to this latter
if os.path.exists("config_expert.txt"):
self.expert_config.read("config_expert.txt")
else:
# Else set the variable to None (scripts will use default expert parameters)
self.expert_config = None
# Else if there is a user-defined expert config file given through "--expert",
# variable expert_config_file has already been set to such given filename (so, it's not an empty string)
else:
self.expert_config = None

# If the user-defined file exists, read it
if os.path.exists(expert_config_file):
self.expert_config.read(expert_config_file)
else:
# Else if the user-defined file doesn't exists (e.g. misspelled), print an error and exit
# (This case is actually already caught by the CLI option definition of "--expert")
logging.error(f"User-defined expert configuration file {expert_config_file} not found:")
logging.error("please check the input path after the '--expert' parameter in the command line and rerun the analysis")
sys.exit(1)


def _get_clean_dict(self, dict_like_string, parameter):
"""This method reads a dictionary-like field from the configuration file \\
Expand Down
4 changes: 2 additions & 2 deletions ksrates/lognormal_mixture.py
Expand Up @@ -14,9 +14,9 @@
from ksrates.fc_cluster_anchors import subfolder
from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE

def lognormal_mixture(config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file):
def lognormal_mixture(config_file, expert_config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging(f"Lognormal mixture model on Ks distribution", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
18 changes: 9 additions & 9 deletions ksrates/paralogs_analyses.py
Expand Up @@ -7,9 +7,9 @@
from ksrates.lognormal_mixture import lognormal_mixture
import ksrates.fc_configfile as fcConf

def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs):
def paralogs_analyses_methods(config_file, expert_config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
logging.basicConfig(format='%(levelname)s\t%(message)s', level=config.get_logging_level(), stream=sys.stdout)

paranome = config.get_paranome()
Expand All @@ -18,27 +18,27 @@ def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correc

if paranome and not colinearity:
# Only exp-log mixture model by default
exp_log_mixture(config_file, paranome_table, correction_table)
exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table)
if extra_paralogs_analyses_methods:
logging.info(f"\n")
# Lognormal mixture model on paranome
lognormal_mixture(config_file, paranome_table, anchors_table, correction_table)
lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table)

if colinearity and not paranome:
# Only anchor clustering by default
cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
if extra_paralogs_analyses_methods:
logging.info(f"\n")
# Lognormal mixture model on anchors
lognormal_mixture(config_file, paranome_table, anchors_table, correction_table)
lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table)

if colinearity and paranome:
# Only anchor clustering by default
cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
if extra_paralogs_analyses_methods:
logging.info(f"\n")
# Exp-log mixture model on paranome
exp_log_mixture(config_file, paranome_table, correction_table)
exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table)
logging.info(f"\n")
# Lognormal mixture model on both
lognormal_mixture(config_file, paranome_table, anchors_table, correction_table)
lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table)
4 changes: 2 additions & 2 deletions ksrates/plot_orthologs.py
Expand Up @@ -15,9 +15,9 @@
matplotlib.use('Agg')


def plot_orthologs_distr(config_file, trios_file):
def plot_orthologs_distr(config_file, expert_config_file, trios_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Plotting ortholog distributions for all ortholog trios", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/plot_paralogs.py
Expand Up @@ -9,9 +9,9 @@
import ksrates.fc_configfile as fcConf
from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE

def plot_paralogs_distr(config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file):
def plot_paralogs_distr(config_file, expert_config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Generating mixed paralog and ortholog distributions", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/plot_tree.py
Expand Up @@ -9,9 +9,9 @@
import pandas


def plot_tree_rates(config_file, correction_table_file, nextflow_flag):
def plot_tree_rates(config_file, expert_config_file, correction_table_file, nextflow_flag):
# INPUT
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging("Generating PDF of input tree with branch length equal to Ks distances", config.get_logging_level())
logging.info("Loading parameters and input files")

Expand Down
4 changes: 2 additions & 2 deletions ksrates/setup_correction.py
Expand Up @@ -10,8 +10,8 @@
from ksrates.utils import init_logging


def setup_correction(config_file, nextflow_flag):
config = fcConf.Configuration(config_file)
def setup_correction(config_file, expert_config_file, nextflow_flag):
config = fcConf.Configuration(config_file, expert_config_file)

init_logging("Setting up the analysis from configuration file", config.get_logging_level())
logging.info("Loading parameters and input files")
Expand Down
4 changes: 2 additions & 2 deletions ksrates/wgd_orthologs.py
Expand Up @@ -7,12 +7,12 @@
from ksrates.utils import init_logging


def wgd_orthologs(config_file, species_one, species_two, n_threads):
def wgd_orthologs(config_file, expert_config_file, species_one, species_two, n_threads):
# INPUT
species_pair = sorted([species_one, species_two], key=str.casefold)
species1, species2 = species_pair[0], species_pair[1] # sorted!

config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
init_logging(f"Ortholog wgd analysis for species pair [{species1} - {species2}]", config.get_logging_level())

# Get parameters and FASTA files from configuration file
Expand Down
4 changes: 2 additions & 2 deletions ksrates/wgd_paralogs.py
Expand Up @@ -8,10 +8,10 @@
from ksrates.utils import init_logging


def wgd_paralogs(config_file, n_threads):
def wgd_paralogs(config_file, expert_config_file, n_threads):
# INPUT
# Get parameters and FASTA files from configuration file
config = fcConf.Configuration(config_file)
config = fcConf.Configuration(config_file, expert_config_file)
species = config.get_species()
init_logging(f"Paralog wgd analysis for species [{species}]", config.get_logging_level())

Expand Down

0 comments on commit bfbb623

Please sign in to comment.