Skip to content

Commit

Permalink
one logger per module
Browse files Browse the repository at this point in the history
  • Loading branch information
julia326 committed Oct 4, 2016
1 parent 98b61b2 commit 0a637a2
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 28 deletions.
14 changes: 8 additions & 6 deletions pyensembl/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
# any time we update the database schema, increment this version number
DATABASE_SCHEMA_VERSION = 2


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class Database(object):
"""
Wrapper around sqlite3 database so that the rest of the
Expand All @@ -51,9 +56,6 @@ def __init__(self, gtf, install_string):
self.install_string = install_string
self._connection = None

self.logger = logging.getLogger()
self.logger.setLevel(logging.INFO)

def __eq__(self, other):
return (
other.__class__ is Database and
Expand Down Expand Up @@ -114,7 +116,7 @@ def _all_possible_indices(self, column_names):
# are not available in all releases of Ensembl (or
# other GTFs)
if column_name not in column_set:
logging.info(
logger.info(
"Skipping database index for {%s}",
", ".join(column_group))
skip = True
Expand Down Expand Up @@ -187,7 +189,7 @@ def create(self, overwrite=False):
str(self))

db_path = self.local_db_path()
print("Creating database: %s" % (db_path,))
logger.info("Creating database: %s", db_path)
df = self.gtf.dataframe()
all_index_groups = self._all_possible_indices(df.columns)

Expand Down Expand Up @@ -396,7 +398,7 @@ def run_sql_query(self, sql, required=False, query_params=[]):
cursor = self.connection.execute(sql, query_params)
except sqlite3.OperationalError as e:
error_message = e.message if hasattr(e, 'message') else str(e)
logging.warn(
logger.warn(
"Encountered error \"%s\" from query \"%s\" with parameters %s",
error_message,
sql,
Expand Down
8 changes: 6 additions & 2 deletions pyensembl/download_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

import datacache


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

CACHE_BASE_SUBDIR = "pyensembl"
CACHE_DIR_ENV_KEY = "PYENSEMBL_CACHE_DIR"

Expand Down Expand Up @@ -208,7 +212,7 @@ def _download_if_necessary(self, url, download_if_missing, overwrite):
cached_path = self.cached_path(url)
missing = not exists(cached_path)
if (missing or overwrite) and download_if_missing:
logging.info("Fetching %s from URL %s", cached_path, url)
logger.info("Fetching %s from URL %s", cached_path, url)
local_filename = split(cached_path)[1]
datacache.download._download(
filename=local_filename,
Expand Down Expand Up @@ -304,7 +308,7 @@ def delete_cached_files(self, prefixes=[], suffixes=[]):
any(filename.startswith(pre) for pre in prefixes))
if delete:
path = join(self.cache_directory_path, filename)
print("Deleting %s" % path)
logger.info("Deleting %s", path)
remove(path)

def delete_cache_directory(self):
Expand Down
9 changes: 7 additions & 2 deletions pyensembl/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@

from six import binary_type, PY3


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def _parse_header_id(line):
"""
Pull the transcript or protein identifier from the header line
Expand Down Expand Up @@ -117,7 +122,7 @@ def _current_entry(self):
# entry of the file then put the last one in the dictionary
if self.current_id:
if len(self.current_lines) == 0:
logging.warn("No sequence data for '%s'" % self.current_id)
logger.warn("No sequence data for '%s'", self.current_id)
else:
sequence = b"".join(self.current_lines)
if PY3:
Expand All @@ -134,7 +139,7 @@ def _read_header(self, line):
self.current_id = _parse_header_id(line)

if len(self.current_id) == 0:
logging.warn("Unable to parse ID from header line: %s" % line)
logger.warn("Unable to parse ID from header line: %s", line)

self.current_lines = []
return previous_entry
Expand Down
4 changes: 0 additions & 4 deletions pyensembl/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"""

from __future__ import print_function, division, absolute_import
import logging
from os import remove
from os.path import exists

Expand Down Expand Up @@ -112,9 +111,6 @@ def __init__(
self.has_gtf = self._gtf_path_or_url is not None
self.has_transcript_fasta = self._transcript_fasta_path_or_url is not None
self.has_protein_fasta = self._protein_fasta_path_or_url is not None

self.logger = logging.getLogger()
self.logger.setLevel(logging.INFO)
self.memory_cache = MemoryCache()

self._init_lazy_fields()
Expand Down
8 changes: 7 additions & 1 deletion pyensembl/gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from __future__ import print_function, division, absolute_import
import logging
from os.path import split, abspath, join, exists, splitext
import pandas as pd

Expand All @@ -22,6 +23,11 @@
from .normalization import normalize_chromosome, normalize_strand
from .memory_cache import MemoryCache


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class GTF(object):
"""
Parse a GTF gene annotation file from a given local path.
Expand Down Expand Up @@ -131,7 +137,7 @@ def _load_full_dataframe_from_gtf(self):
"""
Parse this genome source's GTF file and load it as a Pandas DataFrame
"""
print("Reading GTF from %s" % self.gtf_path)
logger.info("Reading GTF from %s", self.gtf_path)
df = read_gtf_as_dataframe(
self.gtf_path,
column_converters={
Expand Down
11 changes: 8 additions & 3 deletions pyensembl/memory_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@

from .common import load_pickle, dump_pickle


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class MemoryCache(object):
"""
In-memory and on-disk caching of long-running queries and computations.
Expand All @@ -44,7 +49,7 @@ def is_empty(self, filename):

def delete_file(self, path):
if exists(path):
logging.info("Deleting cached file %s" % path)
logger.info("Deleting cached file %s", path)
remove(path)

def remove_from_cache(self, key):
Expand All @@ -58,7 +63,7 @@ def clear_cached_objects(self):
self._memory_cache.clear()

def _read_csv(self, csv_path):
print("Reading Dataframe from %s" % csv_path)
logger.info("Reading Dataframe from %s", csv_path)
df = pd.read_csv(csv_path)
if 'seqname' in df:
# by default, Pandas will infer the type as int,
Expand All @@ -79,7 +84,7 @@ def _write_csv(self, df, csv_path, chunksize=10**5):
Number of rows to write at a time. Helps to limit memory
consumption while writing a CSV.
"""
print("Saving DataFrame to %s" % csv_path)
logger.info("Saving DataFrame to %s", csv_path)
df.to_csv(csv_path, index=False, chunksize=chunksize)

def cached_dataframe(self, csv_path, compute_fn):
Expand Down
19 changes: 12 additions & 7 deletions pyensembl/sequence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
)
from .fasta import parse_fasta_dictionary


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class SequenceData(object):
"""
Container for reference nucleotide and amino acid sequenes.
Expand Down Expand Up @@ -87,19 +92,19 @@ def _load_or_create_fasta_dictionary_pickle(self):
try:
self._fasta_dictionary = load_pickle(
self.fasta_dictionary_pickle_path)
logging.info(
"Loaded sequence dictionary from %s" % self.fasta_dictionary_pickle_path)
logger.info(
"Loaded sequence dictionary from %s", self.fasta_dictionary_pickle_path)
return
except (pickle.UnpicklingError, AttributeError):
# catch either an UnpicklingError or an AttributeError
# resulting from pickled objects refering to classes
# that no longer exists
logging.warn(
"Failed to load %s, attempting to read FASTA directly" % (
self.fasta_dictionary_pickle_path,))
logging.info("Parsing sequences from FASTA file at %s" % self.fasta_path)
logger.warn(
"Failed to load %s, attempting to read FASTA directly",
self.fasta_dictionary_pickle_path)
logger.info("Parsing sequences from FASTA file at %s", self.fasta_path)
self._fasta_dictionary = parse_fasta_dictionary(self.fasta_path)
logging.info("Saving sequence dictionary to %s" % self.fasta_dictionary_pickle_path)
logger.info("Saving sequence dictionary to %s", self.fasta_dictionary_pickle_path)
dump_pickle(self._fasta_dictionary, self.fasta_dictionary_pickle_path)

def index(self, overwrite=False):
Expand Down
10 changes: 8 additions & 2 deletions pyensembl/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,16 @@

from __future__ import absolute_import
import argparse
import logging

from .ensembl_release import EnsemblRelease
from .genome import Genome


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def run():
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
Expand Down Expand Up @@ -134,11 +140,11 @@ def run():
EnsemblRelease(version, species=args.species))

if len(genomes) == 0:
print("ERROR: No genomes selected!\n")
logger.error("ERROR: No genomes selected!")
parser.print_help()

for genome in genomes:
print("-- Running '%s' for %s" % (args.action, genome))
logger.info("Running '%s' for %s", args.action, genome)
if args.action == "delete-all-files":
genome.download_cache.delete_cache_directory()
elif args.action == "delete-index-files":
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ tinytimer>=0.0.0
six>=1.9.0
pylint>=1.4.4
gtfparse>=0.0.3
serializable
serializable
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"six>=1.9.0",
"gtfparse>=0.0.3",
"serializable",
"tinytimer",
],
long_description=readme,
packages=['pyensembl'],
Expand Down

0 comments on commit 0a637a2

Please sign in to comment.