diff --git a/docs/conf.py b/docs/conf.py index 7615ef1..4f74f19 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,7 +28,12 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.autosectionlabel",] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx.ext.autosectionlabel", +] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/mavehgvs/position.py b/mavehgvs/position.py index 6702382..c824fd2 100644 --- a/mavehgvs/position.py +++ b/mavehgvs/position.py @@ -37,7 +37,7 @@ class VariantPosition: """ - __fullmatch = re.compile(pos_with_groups, flags=re.ASCII).fullmatch + fullmatch = re.compile(pos_with_groups, flags=re.ASCII).fullmatch """Callable[[str, int, int], Optional[Match[str]]]: fullmatch callable for parsing positions Returns an :py:obj:`re.Match` object if the full string matches one of the position groups in :py:data:`pos_extended`. @@ -53,7 +53,7 @@ def __init__(self, pos_str: str) -> None: """ try: - gdict = VariantPosition.__fullmatch(pos_str).groupdict() + gdict = VariantPosition.fullmatch(pos_str).groupdict() except AttributeError: raise MaveHgvsParseError(f"invalid variant position string '{pos_str}'") diff --git a/mavehgvs/variant.py b/mavehgvs/variant.py index bb51f7b..96f30ee 100644 --- a/mavehgvs/variant.py +++ b/mavehgvs/variant.py @@ -1,6 +1,6 @@ import re import itertools -from typing import Optional, Union, List, Tuple, Mapping, Any +from typing import Optional, Union, List, Tuple, Mapping, Any, Sequence, Dict, Generator from mavehgvs.position import VariantPosition from mavehgvs.patterns.combined import any_variant @@ -10,14 +10,14 @@ class Variant: - __variant_fullmatch = re.compile(any_variant, flags=re.ASCII).fullmatch + fullmatch = re.compile(any_variant, flags=re.ASCII).fullmatch """Callable[[str, int, int], Optional[Match[str]]]: fullmatch callable for parsing a single MAVE-HGVS variant Returns an :py:obj:`re.Match` object if the full string defines a valid MAVE-HGVS variant. Match groups in the result can be used to extract components of the variant. """ - __vtypes = ( + VTYPES = ( "sub", # substitution "del", # deletion "dup", # duplication @@ -29,7 +29,7 @@ class Variant: def __init__( self, - s: Union[str, Mapping[str, Any]], + s: Union[str, Mapping[str, Any], Sequence[Mapping[str, Any]]], targetseq: Optional[str] = None, relaxed_ordering: bool = False, ): @@ -37,221 +37,260 @@ def __init__( Parameters ---------- - s : Union[str, Mapping[str, Any]] - MAVE-HGVS variant string to convert into an object or dictionary type object containing key-value pairs - corresponding to a MAVE-HGVS object. + s : Union[str, Mapping[str, Any], Sequence[Mapping[str, Any]]] + MAVE-HGVS variant string to convert into an object, dictionary type object containing key-value pairs + corresponding to a MAVE-HGVS object, or list/tuple of dictionary type objects for a variant with + multiple events. targetseq : Optional[str] If provided, the variant will be validated for agreement with this sequence. Target sequence validation is not supported for variants using the extended position syntax. - The type of the target sequence (DNA, RNA, or amino acid) will be inferred. + This must be an amino acid sequence for protein variants or a nucleotide sequence for + coding/noncoding/genomic variants. DNA and amino acid sequences should be in uppercase, RNA in lowercase. relaxed_ordering : bool - If True, variant strings that do not observe the 3-prime rule for variant position ordering are allowed. + If True, variants that do not observe the 3-prime rule for variant position ordering are allowed. The object representation will observe the 3-prime rule, so it may differ from the input string in this case. """ - self.variant_string = None - self.variant_mapping = None + if isinstance(s, str): # variant string to parse + variant_string = s + elif isinstance(s, Mapping): # dictionary-style single variant + variant_string = self._variant_dictionary_to_string(s, include_prefix=True) + elif isinstance(s, Sequence): # dictionary-style multi-variant + try: + all_prefixes = [v["prefix"] for v in s] + except KeyError: + raise MaveHgvsParseError("variant dictionary missing required keys") + if len(set(all_prefixes)) != 1: + raise MaveHgvsParseError( + "cannot combine variants with different prefixes" + ) + variant_string = f"{s[0]['prefix']}.[{';'.join(self._variant_dictionary_to_string(v, include_prefix=False) for v in s)}]" + else: + raise ValueError("can only create Variants from string or Mapping objects") - if isinstance(s, str): - self.variant_string = s - variant_match = self.__variant_fullmatch(self.variant_string) + variant_match = self.fullmatch(variant_string) + if variant_match is None: + raise MaveHgvsParseError("failed regular expression validation") + else: + match_dict = variant_match.groupdict() - if variant_match is None: - raise MaveHgvsParseError("failed regular expression validation") + # set target id if present + if match_dict["target_id"] is not None: + self._target_id = match_dict["target_id"] else: - self._groupdict = variant_match.groupdict() - - # set target id if present - if self._groupdict["target_id"] is not None: - self._target_id = self._groupdict["target_id"] - else: - self._target_id = None - - # set prefix and determine if this is a multi-variant - if self._groupdict["single_variant"] is not None: - self.variant_count = 1 - self._prefix = self._groupdict["single_variant"][0] - elif self._groupdict["multi_variant"] is not None: - self.variant_count = len(s.split(";")) - self._prefix = self._groupdict["multi_variant"][0] - else: # pragma: no cover - raise ValueError("invalid match type") + self._target_id = None + + # set prefix and determine if this is a multi-variant + if match_dict["single_variant"] is not None: + self.variant_count = 1 + self._prefix = match_dict["single_variant"][0] + elif match_dict["multi_variant"] is not None: + self.variant_count = len(variant_string.split(";")) + self._prefix = match_dict["multi_variant"][0] + else: # pragma: no cover + raise ValueError("invalid match type") - if self.variant_count == 1: - self._variant_types, self._positions, self._sequences = self.__process_string_variant( - self._groupdict, relaxed_ordering=relaxed_ordering + if self.variant_count == 1: + self._variant_types, self._positions, self._sequences = self._process_string_variant( + match_dict, relaxed_ordering=relaxed_ordering + ) + elif self.variant_count > 1: + self._variant_types = list() + self._positions = list() + self._sequences = list() + + # format each individual variant event as a single variant and parse it + for variant_substring in match_dict["multi_variant"][3:-1].split(";"): + groupdict = self.fullmatch( + f"{self._prefix}.{variant_substring}" + ).groupdict() + vt, p, s = self._process_string_variant( + groupdict, relaxed_ordering=relaxed_ordering ) - elif self.variant_count > 1: - # TODO: validate variant ordering - self._variant_types = list() - self._positions = list() - self._sequences = list() - - # format each individual variant event as a single variant and parse it - for variant_substring in self._groupdict["multi_variant"][ - 3:-1 - ].split(";"): - groupdict = self.__variant_fullmatch( - f"{self._prefix}.{variant_substring}" - ).groupdict() - vt, p, s = self.__process_string_variant( - groupdict, relaxed_ordering=relaxed_ordering + if p is None: # only the case for target-identical variants + raise MaveHgvsParseError( + "multi-variants cannot contain target-identical variants" ) - if p is None: # only the case for target-identical variants + + self._variant_types.append(vt) + self._positions.append(p) + self._sequences.append(s) + + # ensure that multiple variants aren't defined for the same positions + for vp1, vp2 in itertools.combinations(self._positions, 2): + if isinstance(vp1, VariantPosition) and isinstance( + vp2, VariantPosition + ): # both single position + if vp1 == vp2: raise MaveHgvsParseError( - "multi-variants cannot contain target-identical variants" + "multi-variant has multiple changes for the same position" ) - - self._variant_types.append(vt) - self._positions.append(p) - self._sequences.append(s) - - # ensure that multiple variants aren't defined for the same positions - for vp1, vp2 in itertools.combinations(self._positions, 2): - if isinstance(vp1, VariantPosition) and isinstance( - vp2, VariantPosition - ): # both single position - if vp1 == vp2: - raise MaveHgvsParseError( - "multi-variant has multiple changes for the same position" - ) - elif isinstance(vp1, VariantPosition) and isinstance( - vp2, Tuple - ): - if vp2[0] <= vp1 <= vp2[1]: - raise MaveHgvsParseError( - "multi-variant has overlapping changes" - ) - elif isinstance(vp1, Tuple) and isinstance( - vp2, VariantPosition + elif isinstance(vp1, VariantPosition) and isinstance(vp2, Tuple): + if vp2[0] <= vp1 <= vp2[1]: + raise MaveHgvsParseError( + "multi-variant has overlapping changes" + ) + elif isinstance(vp1, Tuple) and isinstance(vp2, VariantPosition): + if vp1[0] <= vp2 <= vp1[1]: + raise MaveHgvsParseError( + "multi-variant has overlapping changes" + ) + elif isinstance(vp1, Tuple) and isinstance(vp2, Tuple): + if ( + vp1[0] <= vp2[0] <= vp1[1] + or vp1[0] <= vp2[1] <= vp1[1] + or vp2[0] <= vp1[0] <= vp2[1] + or vp2[0] <= vp1[1] <= vp2[1] ): - if vp1[0] <= vp2 <= vp1[1]: - raise MaveHgvsParseError( - "multi-variant has overlapping changes" - ) - elif isinstance(vp1, Tuple) and isinstance(vp2, Tuple): - if ( - vp1[0] <= vp2[0] <= vp1[1] - or vp1[0] <= vp2[1] <= vp1[1] - or vp2[0] <= vp1[0] <= vp2[1] - or vp2[0] <= vp1[1] <= vp2[1] - ): - raise MaveHgvsParseError( - "multi-variant has overlapping changes" - ) - else: # pragma: no cover - raise ValueError("invalid position type") - - # re-order variants and validate - def sort_key(x): - if isinstance(x[1], VariantPosition): - return x[1] - elif isinstance(x[1], Tuple): - return x[1][0] - else: - raise ValueError("invalid position type") - - variant_tuples = list( - zip(self._variant_types, self._positions, self._sequences) - ) - ordered_tuples = sorted(variant_tuples, key=sort_key) - if variant_tuples != ordered_tuples: - if relaxed_ordering: - self._variant_types = [x[0] for x in ordered_tuples] - self._positions = [x[1] for x in ordered_tuples] - self._sequences = [x[2] for x in ordered_tuples] - else: raise MaveHgvsParseError( - "multi-variants not in sorted order" + "multi-variant has overlapping changes" ) + else: # pragma: no cover + raise ValueError("invalid position type") + + # re-order variants and validate + def sort_key(x): + if isinstance(x[1], VariantPosition): + return x[1] + elif isinstance(x[1], Tuple): + return x[1][0] + else: + raise ValueError("invalid position type") - else: # pragma: no cover - raise ValueError("invalid variant count") + variant_list = list(self.variant_tuples()) + ordered_list = sorted(variant_list, key=sort_key) + if variant_list != ordered_list: + if relaxed_ordering: + self._variant_types = [x[0] for x in ordered_list] + self._positions = [x[1] for x in ordered_list] + self._sequences = [x[2] for x in ordered_list] + else: + raise MaveHgvsParseError("multi-variants not in sorted order") - elif isinstance(s, Mapping): - self.variant_mapping = s - # TODO - else: - raise ValueError("can only create Variants from string or Mapping objects") + else: # pragma: no cover + raise ValueError("invalid variant count") if targetseq is not None: - if self.variant_count == 1: - if self._variant_types == "sub": - self._target_validate_substitution( - self._positions, self._sequences[0], targetseq - ) - elif self._variant_types in ("ins", "del", "dup", "delins"): - self._target_validate_indel(self._positions, targetseq) - elif self.variant_count > 1: - pass + for vtype, pos, seq in self.variant_tuples(): + if vtype == "sub": + self._target_validate_substitution(pos, seq[0], targetseq) + elif vtype in ("ins", "del", "dup", "delins"): + self._target_validate_indel(pos, targetseq) + + def variant_tuples( + self + ) -> Generator[ + Tuple[ + str, + Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]], + Optional[Union[str, Tuple[str, str]]], + ], + None, + None, + ]: + """Generator that yields tuples containing the variant components. + + Yields + ------ + Tuple + Tuple of the variant type, position(s), and sequence(s) for each element in the variant. + + """ + if self.is_multi_variant(): + for vtype, pos, seq in zip( + self._variant_types, self._positions, self._sequences + ): + yield vtype, pos, seq + else: + yield self._variant_types, self._positions, self._sequences + + def _process_string_variant( + self, match_dict: Dict[str, str], relaxed_ordering: bool + ) -> Tuple[ + str, + Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]], + Optional[Union[str, Tuple[str, str]]], + ]: + """Process the match dictionary from a single variant into its components. - # TODO: type hints and docstring - def __process_string_variant(self, groupdict, relaxed_ordering): + Parameters + ---------- + match_dict : Dict[str, str] + Match dictionary from the MAVE-HGVS regular expression. + relaxed_ordering : bool + If True, variants that do not observe the 3-prime rule for variant position ordering are allowed. + + Returns + ------- + Tuple[str, Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]], Optional[Union[str, Tuple[str, str]]]] + Returns a 3-tuple containing the variant type, optional position (or start/end positions), + and optional before/after substitution sequences or inserted sequence. + + """ variant_type = None positions = None sequences = None # determine which named groups to check if self._prefix == "p": - gdict_prefixes = [(f"pro_{t}", t) for t in self.__vtypes] + pattern_group_tuples = [(f"pro_{t}", t) for t in self.VTYPES] elif self._prefix == "r": - gdict_prefixes = [(f"rna_{t}", t) for t in self.__vtypes] + pattern_group_tuples = [(f"rna_{t}", t) for t in self.VTYPES] elif self._prefix in "cn": - gdict_prefixes = [(f"dna_{t}_{self._prefix}", t) for t in self.__vtypes] + pattern_group_tuples = [(f"dna_{t}_{self._prefix}", t) for t in self.VTYPES] elif self._prefix in "gmo": - gdict_prefixes = [(f"dna_{t}_gmo", t) for t in self.__vtypes] + pattern_group_tuples = [(f"dna_{t}_gmo", t) for t in self.VTYPES] else: # pragma: no cover raise ValueError("unexpected prefix") # set the variant type vtype_set = False - groupdict_prefix = None - for groupname, vtype in gdict_prefixes: - if groupdict[groupname] is not None: + pattern_group = None + for pg, vtype in pattern_group_tuples: + if match_dict[pg] is not None: if vtype_set: # pragma: no cover - raise ValueError( - f"ambiguous match: '{groupname}' and '{groupdict_prefix}'" - ) + raise ValueError(f"ambiguous match: '{pg}' and '{pattern_group}'") variant_type = vtype - groupdict_prefix = groupname + pattern_group = pg + vtype_set = True # set the position and sequence if variant_type == "sub": if ( - groupdict[f"{groupdict_prefix}_equal"] is not None + match_dict[f"{pattern_group}_equal"] is not None ): # special case for target identity - sequences = groupdict[f"{groupdict_prefix}_equal"] - elif groupdict[f"pro_sub_equal_sy"] is not None: - sequences = groupdict[f"pro_sub_equal_sy"] + sequences = match_dict[f"{pattern_group}_equal"] + elif match_dict[f"pro_sub_equal_sy"] is not None: + sequences = match_dict[f"pro_sub_equal_sy"] else: - positions = VariantPosition(groupdict[f"{groupdict_prefix}_position"]) + positions = VariantPosition(match_dict[f"{pattern_group}_position"]) if self._prefix == "p": sequences = ( positions.amino_acid, - groupdict[f"{groupdict_prefix}_new"], + match_dict[f"{pattern_group}_new"], ) elif self._prefix in "gmo" "cn" "r": sequences = ( - groupdict[f"{groupdict_prefix}_ref"], - groupdict[f"{groupdict_prefix}_new"], + match_dict[f"{pattern_group}_ref"], + match_dict[f"{pattern_group}_new"], ) else: # pragma: no cover raise ValueError("unexpected prefix") elif variant_type in ("del", "dup", "ins", "delins"): # set position if ( - groupdict.get(f"{groupdict_prefix}_pos") is not None + match_dict.get(f"{pattern_group}_pos") is not None ): # use get() since ins pattern doesn't have pos - positions = VariantPosition(groupdict[f"{groupdict_prefix}_pos"]) + positions = VariantPosition(match_dict[f"{pattern_group}_pos"]) else: positions = ( - VariantPosition(groupdict[f"{groupdict_prefix}_start"]), - VariantPosition(groupdict[f"{groupdict_prefix}_end"]), + VariantPosition(match_dict[f"{pattern_group}_start"]), + VariantPosition(match_dict[f"{pattern_group}_end"]), ) # extra validation on positions if positions[0] >= positions[1]: @@ -267,10 +306,82 @@ def __process_string_variant(self, groupdict, relaxed_ordering): # set sequence if needed if variant_type in ("ins", "delins"): - sequences = groupdict[f"{groupdict_prefix}_seq"] + sequences = match_dict[f"{pattern_group}_seq"] return variant_type, positions, sequences + # TODO: API documentation for the dictionary objects + @staticmethod + def _variant_dictionary_to_string( + vdict: Mapping[str, Any], include_prefix: bool + ) -> str: + """Convert a match dictionary from a single variant into a string for further validation. + + This method performs minimal validation of the values provided in the input, and instead converts it into a + variant string that is validated using the regular expression based validators. + + Parameters + ---------- + vdict : Mapping[str, Any] + Key-value pairs describing a single variant. + include_prefix: bool + If True, the variant prefix and '.' will be included in the string; else it is omitted (for use with + multi-variants). + + Returns + ------- + str + A string representing this variant. + + Raises + ------ + MaveHgvsParseError + If the dictionary does not have a valid set of keys. + + """ + try: + variant_type = vdict["variant_type"] + except KeyError: + raise MaveHgvsParseError("variant dictionary missing required keys") + + if variant_type == "sub": + if sorted(vdict.keys()) != sorted( + ["variant_type", "prefix", "position", "target", "variant"] + ): + raise MaveHgvsParseError("variant dictionary contains invalid keys") + if vdict["prefix"] == "p": + variant_string = ( + f"{vdict['target']}{vdict['position']}{vdict['variant']}" + ) + else: + variant_string = ( + f"{vdict['position']}{vdict['target']}>{vdict['variant']}" + ) + elif variant_type in ("del", "dup"): + if sorted(vdict.keys()) != sorted( + ["variant_type", "prefix", "start_position", "end_position"] + ): + raise MaveHgvsParseError("variant dictionary contains invalid keys") + if vdict["start_position"] == vdict["end_position"]: + variant_string = f"{vdict['start_position']}{variant_type}" + else: + variant_string = ( + f"{vdict['start_position']}_{vdict['end_position']}{variant_type}" + ) + elif variant_type in ("ins", "delins"): + if sorted(vdict.keys()) != sorted( + ["variant_type", "prefix", "start_position", "end_position", "sequence"] + ): + raise MaveHgvsParseError("variant dictionary contains invalid keys") + variant_string = f"{vdict['start_position']}_{vdict['end_position']}{variant_type}{vdict['sequence']}" + else: + raise MaveHgvsParseError("invalid variant type") + + if include_prefix: + return f"{vdict['prefix']}.{variant_string}" + else: + return variant_string + def __repr__(self) -> str: """The object representation is equivalent to the input string. @@ -329,15 +440,12 @@ def format_variant( if self.is_target_identical(): return f"{prefix}.{self._sequences}" - elif self.variant_count > 1: - elements = list() - for vtype, pos, seq in zip( - self._variant_types, self._positions, self._sequences - ): - elements.append(format_variant(vtype, pos, seq)) - return f"{prefix}.[{';'.join(elements)}]" else: - return f"{prefix}.{format_variant(self._variant_types, self._positions, self._sequences)}" + elements = [format_variant(*t) for t in self.variant_tuples()] + if self.is_multi_variant(): + return f"{prefix}.[{';'.join(elements)}]" + else: + return f"{prefix}.{elements[0]}" @staticmethod def _target_validate_substitution( @@ -348,16 +456,16 @@ def _target_validate_substitution( Note that variants using extended syntax cannot be validated with this method. If an extended syntax variant is encountered, it will be interpreted as valid/matching. - # TODO: this needs to be aware of protein vs nucleotide targets - Parameters ---------- pos : VariantPosition Position of the substitution. ref : str - Reference base or amino acid. + Reference base or amino acid from the variant. target : str - Target sequence. + Target sequence. This must be an amino acid sequence for protein variants or a nucleotide sequence + for coding/noncoding/genomic variants. + RNA sequences should be in lowercase, DNA sequences should be in uppercase. Returns ------- @@ -390,14 +498,13 @@ def _target_validate_indel( Note that variants using extended syntax cannot be validated with this method. If an extended syntax variant is encountered, it will be interpreted as valid/matching. - # TODO: this needs to be aware of protein vs nucleotide targets - Parameters ---------- pos : Union[VariantPosition, Tuple[VariantPosition, VariantPosition]] Single variant position or start/end tuple for the indel. target : str - Target sequence. + Target sequence. This must be an amino acid sequence for protein variants or a nucleotide sequence + for coding/noncoding/genomic variants. Returns ------- diff --git a/setup.py b/setup.py index 0cf9ecc..fcd5ac6 100644 --- a/setup.py +++ b/setup.py @@ -4,18 +4,18 @@ with open("README.md", "r") as fh: long_description = fh.read() -requirements = ["fqfa>=1.2.0"] +requirements = ["fqfa>=1.2.1"] # fqfa requires backported dataclasses in Python 3.6 if sys.version_info.major == 3 and sys.version_info.minor == 6: requirements.append("dataclasses") setuptools.setup( name="mavehgvs", - version="0.1.0", + version="0.2.0", author="Daniel Esposito and Alan F Rubin", author_email="alan.rubin@wehi.edu.au", description=( - "Regular expression-based validation of HGVS variant strings for clinical genetics and genomics applications." + "Regular expression-based validation of HGVS-style variant strings for Multiplexed Assays of Variant Effect." ), long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/test_variant.py b/tests/test_variant.py index a01a453..e2a01a9 100644 --- a/tests/test_variant.py +++ b/tests/test_variant.py @@ -211,16 +211,202 @@ def test_overlaps(self): class TestCreateSingleVariantFromValues(unittest.TestCase): - pass + def test_sub(self): + valid_dict_tuples = [ + ( + { + "variant_type": "sub", + "prefix": "p", + "position": 27, + "target": "Glu", + "variant": "Trp", + }, + "p.Glu27Trp", + ), + ( + { + "variant_type": "sub", + "prefix": "c", + "position": "122-6", + "target": "T", + "variant": "A", + }, + "c.122-6T>A", + ), + ] + + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) + + def test_ins(self): + valid_dict_tuples = [ + ( + { + "variant_type": "ins", + "prefix": "p", + "start_position": "Ala12", + "end_position": "Pro13", + "sequence": "GlyProCys", + }, + "p.Ala12_Pro13insGlyProCys", + ), + ( + { + "variant_type": "ins", + "prefix": "r", + "start_position": 22, + "end_position": 23, + "sequence": "auc", + }, + "r.22_23insauc", + ), + ] + + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) + + def test_del(self): + valid_dict_tuples = [ + ( + { + "variant_type": "del", + "prefix": "g", + "start_position": 44, + "end_position": 44, + }, + "g.44del", + ), + ( + { + "variant_type": "del", + "prefix": "c", + "start_position": "78+5", + "end_position": "78+10", + }, + "c.78+5_78+10del", + ), + ] + + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) + + def test_dup(self): + valid_dict_tuples = [ + ( + { + "variant_type": "dup", + "prefix": "c", + "start_position": 77, + "end_position": 77, + }, + "c.77dup", + ), + ( + { + "variant_type": "dup", + "prefix": "p", + "start_position": "Pro12", + "end_position": "Gly18", + }, + "p.Pro12_Gly18dup", + ), + ] + + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) + + def test_delins(self): + valid_dict_tuples = [ + ( + { + "variant_type": "delins", + "prefix": "c", + "start_position": "43-6", + "end_position": "595+12", + "sequence": "CTT", + }, + "c.43-6_595+12delinsCTT", + ), + ( + { + "variant_type": "delins", + "prefix": "p", + "start_position": "Ile71", + "end_position": "Cys80", + "sequence": "Ser", + }, + "p.Ile71_Cys80delinsSer", + ), + ] + + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) class TestCreateMultiVariantFromValues(unittest.TestCase): - pass + def test_create_multivariant(self): + valid_dict_tuples = [ + ( + [ + { + "variant_type": "sub", + "prefix": "p", + "position": 27, + "target": "Glu", + "variant": "Trp", + }, + { + "variant_type": "delins", + "prefix": "p", + "start_position": "Ile71", + "end_position": "Cys80", + "sequence": "Ser", + } + ], + "p.[Glu27Trp;Ile71_Cys80delinsSer]", + ), + ( + [ + { + "variant_type": "dup", + "prefix": "c", + "start_position": 77, + "end_position": 77, + }, + { + "variant_type": "sub", + "prefix": "c", + "position": "122-6", + "target": "T", + "variant": "A", + } + ], + "c.[77dup;122-6T>A]", + ), + ] + for d, s in valid_dict_tuples: + with self.subTest(d=d, s=s): + v = Variant(d) + self.assertEqual(s, str(v)) class TestTargetSequenceValidation(unittest.TestCase): def test_matching_dna_substitution(self): - variant_tuples = [("ACGT", "c.1A>T"), ("ACGT", "c.3G>C")] + variant_tuples = [ + ("ACGT", "c.1A>T"), + ("ACGT", "c.3G>C"), + ("ACGT", "c.[1A>T;3G>C]"), + ] for target, s in variant_tuples: with self.subTest(target=target, s=s): @@ -228,7 +414,11 @@ def test_matching_dna_substitution(self): self.assertEqual(s, str(v)) def test_nonmatching_dna_substitution(self): - variant_tuples = [("ACGT", "c.1C>T"), ("ACGT", "c.3T>C")] + variant_tuples = [ + ("ACGT", "c.1C>T"), + ("ACGT", "c.3T>C"), + ("ACGT", "c.[1A>T;3T>C]"), + ] for target, s in variant_tuples: with self.subTest(target=target, s=s): @@ -284,10 +474,7 @@ def test_valid_dna_ins(self): self.assertEqual(s, str(v)) def test_invalid_dna_ins(self): - variant_tuples = [ - ("ACGT", "c.4_5insA"), - ("ACGT", "c.10_11insTCG"), - ] + variant_tuples = [("ACGT", "c.4_5insA"), ("ACGT", "c.10_11insTCG")] for target, s in variant_tuples: with self.subTest(target=target, s=s): @@ -303,10 +490,7 @@ def test_valid_dna_delins(self): self.assertEqual(s, str(v)) def test_invalid_dna_delins(self): - variant_tuples = [ - ("ACGT", "c.4_5delinsA"), - ("ACGT", "c.10_delinsTCG"), - ] + variant_tuples = [("ACGT", "c.4_5delinsA"), ("ACGT", "c.10_delinsTCG")] for target, s in variant_tuples: with self.subTest(target=target, s=s):