Skip to content

Commit

Permalink
remove biocommons hgvs backend
Browse files Browse the repository at this point in the history
  • Loading branch information
afrubin committed Jul 20, 2020
1 parent 2a9671e commit 188e693
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 126 deletions.
20 changes: 0 additions & 20 deletions mavedbconvert/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

from hgvsp import dna

from hgvs.dataproviders import uta
from hgvs.parser import Parser


supported_programs = ("enrich", "enrich2", "empiric")
extra_na = (
"None",
Expand All @@ -25,22 +21,6 @@
surrounding_brackets_re = re.compile(r"\((.*)\)")
dna_re = re.compile(r"[ATCGatcg]+", flags=re.IGNORECASE)

# HGVS
dummy_ref = "NM_000000000.0"
hgvs_parser = Parser()
hdp = None


def get_hdp(*args, **kwargs):
"""Keep a single HDP network connection instance."""
global hdp
if hdp is None:
if "pooling" not in kwargs:
kwargs["pooling"] = True
hdp = uta.connect(*args, **kwargs)
return hdp


# HGVSP constants
hgvsp_nt_pos = "position"
hgvsp_pro_pos = "position"
Expand Down
51 changes: 0 additions & 51 deletions mavedbconvert/tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import pandas as pd

from hgvs.sequencevariant import SequenceVariant

from mavedbconvert import validators, constants, exceptions


Expand All @@ -25,56 +23,7 @@ def test_returns_str_variant(self):
self.assertIsInstance(self.backend.validate("c.1A>G"), str)


class TestHGVSBiocommonsBackend(unittest.TestCase):
def setUp(self):
self.backend = validators.HGVSBiocommonsBackend("NM_000000001.1")

def test_validate_hgvs_returns_seqvar_on_success(self):
result = self.backend.validate("c.100A>G")
self.assertEqual(
result,
constants.hgvs_parser.parse_hgvs_variant(
"{}:c.100A>G".format(self.backend.transcript)
),
)

def test_returns_list_for_multi(self):
result = self.backend.validate("c.[1A>G;2A>G]")
self.assertListEqual(
result,
[
constants.hgvs_parser.parse_hgvs_variant(
"{}:c.1A>G".format(self.backend.transcript)
),
constants.hgvs_parser.parse_hgvs_variant(
"{}:c.2A>G".format(self.backend.transcript)
),
],
)

def test_validate_hgvs_raise_HGVSValidationError(self):
with self.assertRaises(exceptions.HGVSValidationError):
self.backend.validate("p.1102A>G")
with self.assertRaises(exceptions.HGVSValidationError):
self.backend.validate("x.102A>G")

def test_validate_passes_on_special(self):
self.backend.validate(constants.enrich2_wildtype)
self.backend.validate(constants.enrich2_synonymous)

def test_validate_hgvs_uses_dummy_ref_if_transcript_not_passed(self):
self.assertEqual(
validators.HGVSBiocommonsBackend().transcript, constants.dummy_ref
)


class TestValidateHGVS(unittest.TestCase):
def test_uses_biocommons_backend_if_transcript_provided(self):
result = validators.validate_variants(
["c.[1A>G;2A>G]"], n_jobs=2, verbose=0, transcript=constants.dummy_ref
)
self.assertIsInstance(result[0][0], SequenceVariant)

def test_uses_patterns_backend_as_default(self):
result = validators.validate_variants(["c.[1A>G;2A>G]"], n_jobs=2, verbose=0)
self.assertIsInstance(result[0], str)
Expand Down
55 changes: 1 addition & 54 deletions mavedbconvert/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

import hgvsp

from hgvs.sequencevariant import SequenceVariant
from hgvs.exceptions import HGVSParseError

import numpy as np
import pandas as pd
from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -59,53 +56,8 @@ def validate(self, variant):
return variant


class HGVSBiocommonsBackend(ValidationBackend):
"""
Backend using the grammar based validation in `hgvs`. Can be slow but is
more robust.
"""

def __init__(self, transcript=None):
self.transcript = transcript or constants.dummy_ref

def validate(self, variant):
"""
Splits a variant if it is multi-variant to validate each individual
variant since `hgvs` does not support multi syntax. Validates each
HGVS variant against a set grammar.
Parameters
----------
variant : str
HGVS formatted variant string.
Returns
-------
list[`SequenceVariant`]
List of sequence variants. Singular list if variant was not
in multi-variant syntax.
"""
if variant in constants.special_variants:
return variant
try:
seqvars = []
for v in utilities.split_variant(variant):
seqvar = constants.hgvs_parser.parse_hgvs_variant(
"{}:{}".format(self.transcript, v)
)
seqvar.validate()
seqvars.append(seqvar)
return seqvars if len(seqvars) > 1 else seqvars[0]
except HGVSParseError as e:
raise exceptions.HGVSValidationError(
"'{}' is not valid HGVS syntax "
"for the following reason: {}".format(variant, e)
)


def validate_variants(
variants,
transcript=None,
validation_backend=None,
n_jobs=1,
verbose=0,
Expand All @@ -118,8 +70,6 @@ def validate_variants(
----------
variants : list[str]
Variant HGVS_ representations.
transcript : str, optional.
Transcript the variants reference.
validation_backend : ValidationBackend
A parsing backend implementing `validate`.
n_jobs : int, optional
Expand All @@ -135,10 +85,7 @@ def validate_variants(
Formatted and validated variants.
"""
if validation_backend is None:
if transcript is None:
validation_backend = HGVSPatternsBackend()
else:
validation_backend = HGVSBiocommonsBackend(transcript)
validation_backend = HGVSPatternsBackend()
return Parallel(n_jobs=n_jobs, verbose=verbose, backend=backend)(
delayed(validation_backend.validate)(variant) for variant in variants
)
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"tqdm",
"docopt",
"hgvsp @ git+https://github.com/VariantEffect/hgvs-patterns",
"hgvs",
"requests",
"numpy",
"scipy",
Expand Down

0 comments on commit 188e693

Please sign in to comment.