Skip to content

Commit

Permalink
Merge branch 'release/0.3.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
abought committed May 27, 2022
2 parents d151504 + d6aec3a commit 7c818be
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -43,7 +43,7 @@
# For a discussion on single-sourcing the version across setup.py and the
# project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.3.4', # Required
version='0.3.5', # Required

# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
Expand Down
17 changes: 17 additions & 0 deletions tests/test_parsers.py
Expand Up @@ -160,6 +160,23 @@ def test_parses_freq_from_freq(self):
p = special_parser(line)
assert p.alt_allele_freq == 0.25, "Parses frequency as is"

def test_pathological_chromosome_fails(self):
# Regression test for a badly specified user input file: restrict chromosomes to a whitelist, because sometimes
# people manage to slip non-categorical fields to tabix (and then tabix eats all the RAM on the server)
line = '1\tchr1:722408:G:C\t722408\tc\tg\t0.9298\tchr1:722408'
parser_options = {
"alt_col": 5, "pos_col": 3, "ref_col": 4, "beta_col": None, "chrom_col": 2, "pvalue_col": 6,
"stderr_beta_col": None, "is_neg_log_pvalue": None
}

parser = parsers.GenericGwasLineParser(**parser_options)
with pytest.raises(
exceptions.LineParseException,
match="Chromosome 1:722408:G:C is not a valid option. Must be one of: '1 2 3 4 5 6 7 8 9 10 11 12 13 "
"14 15 16 17 18 19 20 21 22 23 24 25 M MT X Y'"
):
parser(line)


class TestStandardGwasParser:
def test_parses_locuszoom_standard_format(self, standard_gwas_parser):
Expand Down
2 changes: 1 addition & 1 deletion zorp/__init__.py
@@ -1,6 +1,6 @@
from distutils.version import LooseVersion

__version__ = '0.3.4'
__version__ = '0.3.5'
__version_info__ = tuple(LooseVersion(__version__).version)

__all__ = [
Expand Down
7 changes: 7 additions & 0 deletions zorp/parser_utils.py
Expand Up @@ -119,3 +119,10 @@ def human_to_zero(value):
return value
else:
return value - 1


def natural_sort(items: ty.Iterable):
"""Natural sort a list of strings. Used for human-friendly error messages, eg, from a `set` of allowed strings"""
convert = lambda text: int(text) if text.isdigit() else text.lower() # noqa: E731
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] # noqa: E731
return sorted(items, key=alphanum_key)
14 changes: 14 additions & 0 deletions zorp/parsers.py
Expand Up @@ -13,6 +13,15 @@
from .const import MISSING_VALUES
from . import exceptions, parser_utils as utils

# Whitelist of allowed chromosomes. It's ok to add more values, as long as we have some kind of whitelist.
# The generic parser uses these as a safeguard, because when people slip a non-categorical value into the chrom field,
# tabix uses all the RAM on the system and then crashes horribly.
ALLOWED_CHROMS = frozenset({
'1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
'21', '22', '23', '24', '25',
'X', 'Y', 'M', 'MT'
})


class BasicVariant:
"""
Expand Down Expand Up @@ -160,6 +169,11 @@ def inner(line):

chrom = chrom.upper()

if chrom not in ALLOWED_CHROMS:
options = ' '.join(utils.natural_sort(ALLOWED_CHROMS))
raise exceptions.LineParseException(
f"Chromosome {chrom} is not a valid option. Must be one of: '{options}'")

# Explicit columns will override a value from the marker, by design
if _ref_col is not None:
ref = fields[_ref_col]
Expand Down

0 comments on commit 7c818be

Please sign in to comment.