one logger per module

openvax · Oct 4, 2016 · 0a637a2 · 0a637a2
1 parent 98b61b2
commit 0a637a2
Show file tree

Hide file tree

Showing 10 changed files with 58 additions and 28 deletions.
diff --git a/pyensembl/database.py b/pyensembl/database.py
@@ -28,6 +28,11 @@
 # any time we update the database schema, increment this version number
 DATABASE_SCHEMA_VERSION = 2
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 class Database(object):
     """
     Wrapper around sqlite3 database so that the rest of the
@@ -51,9 +56,6 @@ def __init__(self, gtf, install_string):
         self.install_string = install_string
         self._connection = None
 
-        self.logger = logging.getLogger()
-        self.logger.setLevel(logging.INFO)
-
     def __eq__(self, other):
         return (
             other.__class__ is Database and
@@ -114,7 +116,7 @@ def _all_possible_indices(self, column_names):
                 # are not available in all releases of Ensembl (or
                 # other GTFs)
                 if column_name not in column_set:
-                    logging.info(
+                    logger.info(
                         "Skipping database index for {%s}",
                         ", ".join(column_group))
                     skip = True
@@ -187,7 +189,7 @@ def create(self, overwrite=False):
                              str(self))
 
         db_path = self.local_db_path()
-        print("Creating database: %s" % (db_path,))
+        logger.info("Creating database: %s", db_path)
         df = self.gtf.dataframe()
         all_index_groups = self._all_possible_indices(df.columns)
 
@@ -396,7 +398,7 @@ def run_sql_query(self, sql, required=False, query_params=[]):
             cursor = self.connection.execute(sql, query_params)
         except sqlite3.OperationalError as e:
             error_message = e.message if hasattr(e, 'message') else str(e)
-            logging.warn(
+            logger.warn(
                 "Encountered error \"%s\" from query \"%s\" with parameters %s",
                 error_message,
                 sql,

diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py
@@ -19,6 +19,10 @@
 
 import datacache
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
 CACHE_BASE_SUBDIR = "pyensembl"
 CACHE_DIR_ENV_KEY = "PYENSEMBL_CACHE_DIR"
 
@@ -208,7 +212,7 @@ def _download_if_necessary(self, url, download_if_missing, overwrite):
         cached_path = self.cached_path(url)
         missing = not exists(cached_path)
         if (missing or overwrite) and download_if_missing:
-            logging.info("Fetching %s from URL %s", cached_path, url)
+            logger.info("Fetching %s from URL %s", cached_path, url)
             local_filename = split(cached_path)[1]
             datacache.download._download(
                 filename=local_filename,
@@ -304,7 +308,7 @@ def delete_cached_files(self, prefixes=[], suffixes=[]):
                 any(filename.startswith(pre) for pre in prefixes))
             if delete:
                 path = join(self.cache_directory_path, filename)
-                print("Deleting %s" % path)
+                logger.info("Deleting %s", path)
                 remove(path)
 
     def delete_cache_directory(self):

diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
@@ -24,6 +24,11 @@
 
 from six import binary_type, PY3
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 def _parse_header_id(line):
     """
     Pull the transcript or protein identifier from the header line
@@ -117,7 +122,7 @@ def _current_entry(self):
         # entry of the file then put the last one in the dictionary
         if self.current_id:
             if len(self.current_lines) == 0:
-                logging.warn("No sequence data for '%s'" % self.current_id)
+                logger.warn("No sequence data for '%s'", self.current_id)
             else:
                 sequence = b"".join(self.current_lines)
                 if PY3:
@@ -134,7 +139,7 @@ def _read_header(self, line):
         self.current_id = _parse_header_id(line)
 
         if len(self.current_id) == 0:
-            logging.warn("Unable to parse ID from header line: %s" % line)
+            logger.warn("Unable to parse ID from header line: %s", line)
 
         self.current_lines = []
         return previous_entry

diff --git a/pyensembl/genome.py b/pyensembl/genome.py
@@ -18,7 +18,6 @@
 """
 
 from __future__ import print_function, division, absolute_import
-import logging
 from os import remove
 from os.path import exists
 
@@ -112,9 +111,6 @@ def __init__(
         self.has_gtf = self._gtf_path_or_url is not None
         self.has_transcript_fasta = self._transcript_fasta_path_or_url is not None
         self.has_protein_fasta = self._protein_fasta_path_or_url is not None
-
-        self.logger = logging.getLogger()
-        self.logger.setLevel(logging.INFO)
         self.memory_cache = MemoryCache()
 
         self._init_lazy_fields()

diff --git a/pyensembl/gtf.py b/pyensembl/gtf.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from __future__ import print_function, division, absolute_import
+import logging
 from os.path import split, abspath, join, exists, splitext
 import pandas as pd
 
@@ -22,6 +23,11 @@
 from .normalization import normalize_chromosome, normalize_strand
 from .memory_cache import MemoryCache
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 class GTF(object):
     """
     Parse a GTF gene annotation file from a given local path.
@@ -131,7 +137,7 @@ def _load_full_dataframe_from_gtf(self):
         """
         Parse this genome source's GTF file and load it as a Pandas DataFrame
         """
-        print("Reading GTF from %s" % self.gtf_path)
+        logger.info("Reading GTF from %s", self.gtf_path)
         df = read_gtf_as_dataframe(
             self.gtf_path,
             column_converters={

diff --git a/pyensembl/memory_cache.py b/pyensembl/memory_cache.py
@@ -32,6 +32,11 @@
 
 from .common import load_pickle, dump_pickle
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 class MemoryCache(object):
     """
     In-memory and on-disk caching of long-running queries and computations.
@@ -44,7 +49,7 @@ def is_empty(self, filename):
 
     def delete_file(self, path):
         if exists(path):
-            logging.info("Deleting cached file %s" % path)
+            logger.info("Deleting cached file %s", path)
             remove(path)
 
     def remove_from_cache(self, key):
@@ -58,7 +63,7 @@ def clear_cached_objects(self):
         self._memory_cache.clear()
 
     def _read_csv(self, csv_path):
-        print("Reading Dataframe from %s" % csv_path)
+        logger.info("Reading Dataframe from %s", csv_path)
         df = pd.read_csv(csv_path)
         if 'seqname' in df:
             # by default, Pandas will infer the type as int,
@@ -79,7 +84,7 @@ def _write_csv(self, df, csv_path, chunksize=10**5):
             Number of rows to write at a time. Helps to limit memory
             consumption while writing a CSV.
         """
-        print("Saving DataFrame to %s" % csv_path)
+        logger.info("Saving DataFrame to %s", csv_path)
         df.to_csv(csv_path, index=False, chunksize=chunksize)
 
     def cached_dataframe(self, csv_path, compute_fn):

diff --git a/pyensembl/sequence_data.py b/pyensembl/sequence_data.py
@@ -26,6 +26,11 @@
 )
 from .fasta import parse_fasta_dictionary
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 class SequenceData(object):
     """
     Container for reference nucleotide and amino acid sequenes.
@@ -87,19 +92,19 @@ def _load_or_create_fasta_dictionary_pickle(self):
             try:
                 self._fasta_dictionary = load_pickle(
                     self.fasta_dictionary_pickle_path)
-                logging.info(
-                    "Loaded sequence dictionary from %s" % self.fasta_dictionary_pickle_path)
+                logger.info(
+                    "Loaded sequence dictionary from %s", self.fasta_dictionary_pickle_path)
                 return
             except (pickle.UnpicklingError, AttributeError):
                 # catch either an UnpicklingError or an AttributeError
                 # resulting from pickled objects refering to classes
                 # that no longer exists
-                logging.warn(
-                    "Failed to load %s, attempting to read FASTA directly" % (
-                        self.fasta_dictionary_pickle_path,))
-        logging.info("Parsing sequences from FASTA file at %s" % self.fasta_path)
+                logger.warn(
+                    "Failed to load %s, attempting to read FASTA directly",
+                        self.fasta_dictionary_pickle_path)
+        logger.info("Parsing sequences from FASTA file at %s", self.fasta_path)
         self._fasta_dictionary = parse_fasta_dictionary(self.fasta_path)
-        logging.info("Saving sequence dictionary to %s" % self.fasta_dictionary_pickle_path)
+        logger.info("Saving sequence dictionary to %s", self.fasta_dictionary_pickle_path)
         dump_pickle(self._fasta_dictionary, self.fasta_dictionary_pickle_path)
 
     def index(self, overwrite=False):

diff --git a/pyensembl/shell.py b/pyensembl/shell.py
@@ -43,10 +43,16 @@
 
 from __future__ import absolute_import
 import argparse
+import logging
 
 from .ensembl_release import EnsemblRelease
 from .genome import Genome
 
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
 def run():
     parser = argparse.ArgumentParser(usage=__doc__)
     parser.add_argument(
@@ -134,11 +140,11 @@ def run():
                 EnsemblRelease(version, species=args.species))
 
     if len(genomes) == 0:
-        print("ERROR: No genomes selected!\n")
+        logger.error("ERROR: No genomes selected!")
         parser.print_help()
 
     for genome in genomes:
-        print("-- Running '%s' for %s" % (args.action, genome))
+        logger.info("Running '%s' for %s", args.action, genome)
         if args.action == "delete-all-files":
             genome.download_cache.delete_cache_directory()
         elif args.action == "delete-index-files":

diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,4 @@ tinytimer>=0.0.0
 six>=1.9.0
 pylint>=1.4.4
 gtfparse>=0.0.3
-serializable
+serializable
diff --git a/setup.py b/setup.py
@@ -79,6 +79,7 @@
             "six>=1.9.0",
             "gtfparse>=0.0.3",
             "serializable",
+            "tinytimer",
         ],
         long_description=readme,
         packages=['pyensembl'],