User-defined filename/path for expert config file (#37)

Expert config filename/path can be user-defined: * Add expert config option to Nextflow main.nf * Add expert config option to ksrates CLI commands * Add expert config argument in Python modules * Adapt documentation to new expert config option * Add "-e" as short option name for "--expert" * Removing redundant Nextflow log lines
VIB-PSB · Mar 24, 2022 · bfbb623 · bfbb623
1 parent 621191f
commit bfbb623
Show file tree

Hide file tree

Showing 16 changed files with 229 additions and 78 deletions.
diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst
@@ -201,7 +201,17 @@ The Nextflow configuration file is used to configure various settings for the *k
 Expert configuration file
 =========================
 
-This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file has to be named `config_expert.txt` and is then automatically detected when launching *ksrates*. The following can be used as a template::
+This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file can be provided in the command line through the ``--expert`` option. However, when named with default name ``config_expert.txt`` and placed in the launching directory, the file is automatically detected without needing the option in the command line.
+
+Syntax for the Nextflow pipeline::
+
+        nextflow run VIB-PSB/ksrates --config config_elaeis.txt --expert path/to/my_expert_config.txt
+    
+Syntax for single `ksrates` commands::
+
+        ksrates init config_elaeis.txt --expert path/to/my_expert_config.txt 
+
+The following can be used as a template::
 
     [EXPERT PARAMETERS]
     

diff --git a/ksrates/cluster_anchor_ks.py b/ksrates/cluster_anchor_ks.py
@@ -16,8 +16,8 @@
 from ksrates.fc_cluster_anchors import subfolder
 from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE
 
-def cluster_anchor_ks(config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt):
-    config = fcConf.Configuration(config_file)
+def cluster_anchor_ks(config_file, expert_config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt):
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging(f"Clustering anchorpoints Ks values to reconstruct recent WGD events", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/compute_peaks.py b/ksrates/compute_peaks.py
@@ -10,9 +10,9 @@
 from ksrates.utils import init_logging
 
 
-def compute_peaks(config_file, ortholog_pairs_file):
+def compute_peaks(config_file, expert_config_file, ortholog_pairs_file):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging("Computing ortholog distribution peaks with related error", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/correct.py b/ksrates/correct.py
@@ -10,9 +10,9 @@
 from ksrates.utils import init_logging
 
 
-def correct(config_file, trios_file):
+def correct(config_file, expert_config_file, trios_file):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging("Rate-adjustment of ortholog Ks distributions", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/exp_log_mixture.py b/ksrates/exp_log_mixture.py
@@ -15,9 +15,9 @@
 from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE
 
 
-def exp_log_mixture(config_file, paralog_tsv_file, correction_table_file):
+def exp_log_mixture(config_file, expert_config_file, paralog_tsv_file, correction_table_file):
   # INPUT
-  config = fcConf.Configuration(config_file)
+  config = fcConf.Configuration(config_file, expert_config_file)
   init_logging("Exponential-Lognormal mixture model on Ks paranome", config.get_logging_level())
   logging.info("Loading parameters and input files")
 

diff --git a/ksrates/fc_configfile.py b/ksrates/fc_configfile.py
@@ -9,7 +9,7 @@
 
 class Configuration:
 
-    def __init__(self, config_path):
+    def __init__(self, config_path, expert_config_file):
         """
         Initializes the configuration file and the expert configuration file.
         This latter is always named "config_expert.txt", the code looks for it
@@ -19,13 +19,31 @@ def __init__(self, config_path):
         # Configuration file
         self.config = configparser.ConfigParser()
         self.config.read(config_path)
+
         # Expert configuration file
-        if os.path.exists("config_expert.txt"):
-            self.expert_config = configparser.ConfigParser()
-            self.expert_config.read("config_expert.txt")
+        self.expert_config = configparser.ConfigParser()
+        # If there is no user-defined expert config file given through "--expert" in the command line,
+        # variable "expert_config_file" was set to an empty string in the CLI code block
+        if expert_config_file == "":
+            # If there is in the launching folder an expert config file called with the default name "config_expert.txt", fallback to this latter
+            if os.path.exists("config_expert.txt"):
+                self.expert_config.read("config_expert.txt")
+            else:
+                # Else set the variable to None (scripts will use default expert parameters)
+                self.expert_config = None
+        # Else if there is a user-defined expert config file given through "--expert",
+        # variable expert_config_file has already been set to such given filename (so, it's not an empty string)
         else:
-            self.expert_config = None
-
+            # If the user-defined file exists, read it
+            if os.path.exists(expert_config_file):
+                self.expert_config.read(expert_config_file)
+            else:
+                # Else if the user-defined file doesn't exists (e.g. misspelled), print an error and exit
+                # (This case is actually already caught by the CLI option definition of "--expert")
+                logging.error(f"User-defined expert configuration file {expert_config_file} not found:")
+                logging.error("please check the input path after the '--expert' parameter in the command line and rerun the analysis")
+                sys.exit(1)
+
 
     def _get_clean_dict(self, dict_like_string, parameter):
         """This method reads a dictionary-like field from the configuration file \\

diff --git a/ksrates/lognormal_mixture.py b/ksrates/lognormal_mixture.py
@@ -14,9 +14,9 @@
 from ksrates.fc_cluster_anchors import subfolder
 from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE
 
-def lognormal_mixture(config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file):
+def lognormal_mixture(config_file, expert_config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging(f"Lognormal mixture model on Ks distribution", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/paralogs_analyses.py b/ksrates/paralogs_analyses.py
@@ -7,9 +7,9 @@
 from ksrates.lognormal_mixture import lognormal_mixture
 import ksrates.fc_configfile as fcConf
 
-def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs):
+def paralogs_analyses_methods(config_file, expert_config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs):
   # INPUT
-  config = fcConf.Configuration(config_file)
+  config = fcConf.Configuration(config_file, expert_config_file)
   logging.basicConfig(format='%(levelname)s\t%(message)s', level=config.get_logging_level(), stream=sys.stdout)
 
   paranome = config.get_paranome()
@@ -18,27 +18,27 @@ def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correc
 
   if paranome and not colinearity:
     # Only exp-log mixture model by default
-    exp_log_mixture(config_file, paranome_table, correction_table)
+    exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table)
     if extra_paralogs_analyses_methods:
       logging.info(f"\n")
       # Lognormal mixture model on paranome
-      lognormal_mixture(config_file, paranome_table, anchors_table, correction_table) 
+      lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table) 
 
   if colinearity and not paranome:
     # Only anchor clustering by default
-    cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
+    cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
     if extra_paralogs_analyses_methods:
       logging.info(f"\n")
       # Lognormal mixture model on anchors
-      lognormal_mixture(config_file, paranome_table, anchors_table, correction_table)
+      lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table)
 
   if colinearity and paranome:
     # Only anchor clustering by default
-    cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
+    cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs)
     if extra_paralogs_analyses_methods:
       logging.info(f"\n")
       # Exp-log mixture model on paranome
-      exp_log_mixture(config_file, paranome_table, correction_table)
+      exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table)
       logging.info(f"\n")
       # Lognormal mixture model on both
-      lognormal_mixture(config_file, paranome_table, anchors_table, correction_table)
+      lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table)
diff --git a/ksrates/plot_orthologs.py b/ksrates/plot_orthologs.py
@@ -15,9 +15,9 @@
 matplotlib.use('Agg')
 
 
-def plot_orthologs_distr(config_file, trios_file):
+def plot_orthologs_distr(config_file, expert_config_file, trios_file):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging("Plotting ortholog distributions for all ortholog trios", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/plot_paralogs.py b/ksrates/plot_paralogs.py
@@ -9,9 +9,9 @@
 import ksrates.fc_configfile as fcConf
 from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE
 
-def plot_paralogs_distr(config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file):
+def plot_paralogs_distr(config_file, expert_config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging("Generating mixed paralog and ortholog distributions", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/plot_tree.py b/ksrates/plot_tree.py
@@ -9,9 +9,9 @@
 import pandas
 
 
-def plot_tree_rates(config_file, correction_table_file, nextflow_flag):
+def plot_tree_rates(config_file, expert_config_file, correction_table_file, nextflow_flag):
     # INPUT
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging("Generating PDF of input tree with branch length equal to Ks distances", config.get_logging_level())
     logging.info("Loading parameters and input files")
 

diff --git a/ksrates/setup_correction.py b/ksrates/setup_correction.py
@@ -10,8 +10,8 @@
 from ksrates.utils import init_logging
 
 
-def setup_correction(config_file, nextflow_flag):
-    config = fcConf.Configuration(config_file)
+def setup_correction(config_file, expert_config_file, nextflow_flag):
+    config = fcConf.Configuration(config_file, expert_config_file)
 
     init_logging("Setting up the analysis from configuration file", config.get_logging_level())
     logging.info("Loading parameters and input files")

diff --git a/ksrates/wgd_orthologs.py b/ksrates/wgd_orthologs.py
@@ -7,12 +7,12 @@
 from ksrates.utils import init_logging
 
 
-def wgd_orthologs(config_file, species_one, species_two, n_threads):
+def wgd_orthologs(config_file, expert_config_file, species_one, species_two, n_threads):
     # INPUT
     species_pair = sorted([species_one, species_two], key=str.casefold)
     species1, species2 = species_pair[0], species_pair[1] # sorted!
 
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     init_logging(f"Ortholog wgd analysis for species pair [{species1} - {species2}]", config.get_logging_level())
 
     # Get parameters and FASTA files from configuration file

diff --git a/ksrates/wgd_paralogs.py b/ksrates/wgd_paralogs.py
@@ -8,10 +8,10 @@
 from ksrates.utils import init_logging
 
 
-def wgd_paralogs(config_file, n_threads):
+def wgd_paralogs(config_file, expert_config_file, n_threads):
     # INPUT
     # Get parameters and FASTA files from configuration file
-    config = fcConf.Configuration(config_file)
+    config = fcConf.Configuration(config_file, expert_config_file)
     species = config.get_species()
     init_logging(f"Paralog wgd analysis for species [{species}]", config.get_logging_level())