Skip to content

Commit

Permalink
Merge branch 'release/0.3.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
abought committed Dec 17, 2021
2 parents e926ce9 + 9e7d1bd commit d151504
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 34 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/pythonpackage.yml
@@ -0,0 +1,47 @@
name: Python unit tests

on:
push:
branches:
- master
- develop
pull_request:
branches:
- master
- develop
release:
types:
- created

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Install `wheel` so that pip can cache wheels it builds to save a few minutes on each run.
# Pip keeps a cache of HTTPS requests in `~/.cache/pip/http/`. For packages that have only
# a tar file available (and no wheel), pip builds the package itself. It only caches to
# `~/.cache/pip/wheels/` if `wheel` is installed.
pip install wheel
pip install -e '.[test,perf,lookups]'
- name: Run tests
run: |
mypy .
pytest --flake8 .
13 changes: 0 additions & 13 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
@@ -1,6 +1,6 @@
# ZORP: A helpful GWAS parser

[![Build Status](https://api.travis-ci.org/abought/zorp.svg?branch=develop)](https://api.travis-ci.org/abought/zorp)
![Build Status](https://github.com/abought/zorp/workflows/Python%20unit%20tests/badge.svg?branch=develop)

## Why?
ZORP is intended to abstract away differences in file formats, and help you work with GWAS data from many
Expand Down
3 changes: 1 addition & 2 deletions bin/zorp_convert.py
Expand Up @@ -60,7 +60,7 @@ def main(source: ty.Union[str, ty.Iterable],
skip_rows=None,
skip_errors=True,
max_errors=100,
make_tabix: bool = False) -> str:
make_tabix: bool = False):
try:
parser = parsers.GenericGwasLineParser(**parser_options)
except exceptions.ConfigurationException:
Expand All @@ -85,7 +85,6 @@ def main(source: ty.Union[str, ty.Iterable],
logger.exception('Conversion failed due to unknown error')
else:
logger.info('Conversion succeeded! Results written to: {}'.format(dest_fn))
return dest_fn
finally:
for n, reason, _ in reader.errors:
logger.error('Excluded row {} from output due to parse error: {}'.format(n, reason))
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -3,6 +3,7 @@ exclude = .git,env,venv,.venv,node_modules,docs
max-line-length = 120

[mypy]
exclude = tests/*
python_version = 3.6
check_untyped_defs = True
ignore_errors = False
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -43,7 +43,7 @@
# For a discussion on single-sourcing the version across setup.py and the
# project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.3.3', # Required
version='0.3.4', # Required

# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
Expand Down
18 changes: 18 additions & 0 deletions tests/test_sniffers.py
Expand Up @@ -200,6 +200,24 @@ def test_can_guess_standard_format(self):
assert actual._parser._allele_freq_col is None, 'Sniffer does not try to detect allele freq'
# ...yet. Allele freqs are hard to guess reliably, but maybe we will improve.

def test_can_guess_gwas_catalog_mostly(self):
data = _fixture_to_strings([
['chromosome', 'base_pair_location', 'effect_allele', 'other_allele', 'odds_ratio', 'ci_lower', 'ci_upper', 'standard_error', 'p_value'], # noqa
['1', '1108138', 'A', 'G', '1.081', '0.8822', '1.325', '0.1038', '0.4517']
])
actual = sniffers.guess_gwas_generic(data)

assert h(actual._parser._chrom_col) == 1, 'Found index of chr col'
assert h(actual._parser._pos_col) == 2, 'Found index of pos col'
# The EBI GWAS catalog uses "effect" and "non_effect". The meaning of this varies from one analysis to another.
# A user will have to decide how to handle the reference genome for themselves.
assert actual._parser._ref_col is None, 'Did NOT identify ref col, b/c GWAS catalog uses ambiguous "effect"'
assert actual._parser._alt_col is None, 'Did NOT identify alt col, b/c GWAS catalog uses ambiguous "effect"'

assert h(actual._parser._stderr_col) == 8, 'stderr_beta field detected'
assert h(actual._parser._pvalue_col) == 9, 'Found index of pval col'
assert actual._parser._is_neg_log_pvalue is False, 'Determined whether is log'

def test_can_guess_bolt_lmm(self):
data = _fixture_to_strings([
['SNP', 'CHR', 'BP', 'A1', 'A0', 'MAF', 'HWEP', 'INFO', 'BETA', 'SE', 'P'],
Expand Down
2 changes: 1 addition & 1 deletion zorp/__init__.py
@@ -1,6 +1,6 @@
from distutils.version import LooseVersion

__version__ = '0.3.3'
__version__ = '0.3.4'
__version_info__ = tuple(LooseVersion(__version__).version)

__all__ = [
Expand Down
32 changes: 16 additions & 16 deletions zorp/sniffers.py
Expand Up @@ -93,7 +93,7 @@ def get_pval_column(header_names: list, data_rows: ty.Iterable, overrides: dict
overrides = overrides or {}

LOGPVALUE_FIELDS = ('neg_log_pvalue', 'log_pvalue', 'log_pval', 'logpvalue')
PVALUE_FIELDS = ('pvalue', 'p.value', 'p-value', 'pval', 'p_score', 'p')
PVALUE_FIELDS = ('pvalue', 'p.value', 'p-value', 'pval', 'p_score', 'p', 'p_value')

data = itertools.islice(data_rows, 100)

Expand Down Expand Up @@ -135,12 +135,11 @@ def get_chrom_pos_ref_alt_columns(header_names: list, data_rows: ty.Iterable, ov

# Get from either a marker, or 4 separate columns
MARKER_FIELDS = ('snpid', 'marker', 'markerid', 'snpmarker', 'chr:position')
CHR_FIELDS = ('chrom', 'chr')
POS_FIELDS = ('position', 'pos', 'begin', 'beg', 'bp', 'end', 'ps')
CHR_FIELDS = ('chrom', 'chr', 'chromosome')
POS_FIELDS = ('position', 'pos', 'begin', 'beg', 'bp', 'end', 'ps', 'base_pair_location')

data = itertools.islice(data_rows, 100)

# TODO: How to handle orienting ref vs effect?
# Order matters: consider ambiguous field names for ref before alt
REF_FIELDS = ('A1', 'ref', 'reference', 'allele0', 'allele1')
ALT_FIELDS = ('A2', 'alt', 'alternate', 'allele1', 'allele2')
Expand All @@ -155,21 +154,21 @@ def get_chrom_pos_ref_alt_columns(header_names: list, data_rows: ty.Iterable, ov
# be found for this function to report a match.
headers_marked = header_names.copy()
to_find = [
['chrom_col', CHR_FIELDS],
['pos_col', POS_FIELDS],
['ref_col', REF_FIELDS],
['alt_col', ALT_FIELDS],
['chrom_col', CHR_FIELDS, True],
['pos_col', POS_FIELDS, True],
['ref_col', REF_FIELDS, False],
['alt_col', ALT_FIELDS, False],
]
config = {}
for col_name, col_choices in to_find:
for col_name, col_choices, is_required in to_find:
col = utils.human_to_zero(overrides.get(col_name)) or \
find_column(col_choices, headers_marked, threshold=1) # type: ignore
if col is None:
if col is None and is_required:
return {}

config[col_name] = col + 1
# Once a column has been assigned, remove it from consideration for future matches
headers_marked[col] = None
if col is not None:
config[col_name] = col + 1
# Once a column has been assigned, remove it from consideration for future matches
headers_marked[col] = None

return config

Expand All @@ -178,7 +177,7 @@ def get_effect_size_columns(header_names: list, data_rows: ty.Iterable, override
overrides = overrides or {}

BETA_FIELDS = ('beta', 'effect_size', 'alt_effsize', 'effect')
STDERR_BETA_FIELDS = ('stderr_beta', 'stderr', 'sebeta', 'effect_size_sd', 'se')
STDERR_BETA_FIELDS = ('stderr_beta', 'stderr', 'sebeta', 'effect_size_sd', 'se', 'standard_error')

data = itertools.islice(data_rows, 100)

Expand Down Expand Up @@ -333,10 +332,11 @@ def guess_gwas_standard(filename: ty.Union[ty.Iterable, str], *,
column_config = {}

required_cols = [
['chrom', 'chrom_col'], ['pos', 'pos_col'], ['ref', 'ref_col'], ['alt', 'alt_col'],
['chrom', 'chrom_col'], ['pos', 'pos_col'],
['neg_log_pvalue', 'pvalue_col']
]
optional_cols = [
['ref', 'ref_col'], ['alt', 'alt_col'],
['beta', 'beta_col'], ['stderr_beta', 'stderr_beta_col'],
['alt_allele_freq', 'allele_freq_col'],
['rsid', 'rsid_col']
Expand Down

0 comments on commit d151504

Please sign in to comment.