Skip to content

Commit

Permalink
Merge pull request #218 from UC-Davis-molecular-computing/216-use-pip…
Browse files Browse the repository at this point in the history
…e-instead-of-union-for-union-type-annotations

216 use pipe instead of union for union type annotations
  • Loading branch information
dave-doty committed Aug 2, 2022
2 parents bc1395a + dc5f9b2 commit 3b88db2
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 232 deletions.
2 changes: 1 addition & 1 deletion nuad/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '0.3.0' # version line; WARNING: do not remove or change this line or comment
version = '0.3.1' # version line; WARNING: do not remove or change this line or comment
288 changes: 145 additions & 143 deletions nuad/constraints.py

Large diffs are not rendered by default.

18 changes: 10 additions & 8 deletions nuad/modifications.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import enum
from dataclasses import dataclass
from abc import ABC, abstractmethod
from typing import Optional, Any, Dict, AbstractSet
from typing import Any, Dict, AbstractSet

import scadnano as sc
from nuad.json_noindent_serializer import JSONSerializable, NoIndent
Expand Down Expand Up @@ -118,11 +120,11 @@ def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> D
# remove quotes when Py3.6 support dropped
@staticmethod
def from_json(json_map: Dict[str, Any]) -> 'Modification5Prime':
id = json_map[mod_id_key]
id_ = json_map[mod_id_key]
location = json_map[mod_location_key]
assert location == "5'"
idt_text = json_map.get(mod_idt_text_key)
return Modification5Prime(idt_text=idt_text, id=id)
return Modification5Prime(idt_text=idt_text, id=id_)

@staticmethod
def modification_type() -> ModificationType:
Expand All @@ -144,11 +146,11 @@ def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> D
# remove quotes when Py3.6 support dropped
@staticmethod
def from_json(json_map: Dict[str, Any]) -> 'Modification3Prime':
id = json_map[mod_id_key]
id_ = json_map[mod_id_key]
location = json_map[mod_location_key]
assert location == "3'"
idt_text = json_map.get(mod_idt_text_key)
return Modification3Prime(idt_text=idt_text, id=id)
return Modification3Prime(idt_text=idt_text, id=id_)

@staticmethod
def modification_type() -> ModificationType:
Expand All @@ -162,7 +164,7 @@ def to_scadnano_modification(self) -> sc.Modification3Prime:
class ModificationInternal(Modification):
"""Internal modification of DNA sequence, e.g., biotin or Cy3."""

allowed_bases: Optional[AbstractSet[str]] = None
allowed_bases: AbstractSet[str] | None = None
"""If None, then this is an internal modification that goes between bases.
If instead it is a list of bases, then this is an internal modification that attaches to a base,
and this lists the allowed bases for this internal modification to be placed at.
Expand All @@ -185,13 +187,13 @@ def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> D
# remove quotes when Py3.6 support dropped
@staticmethod
def from_json(json_map: Dict[str, Any]) -> 'ModificationInternal':
id = json_map[mod_id_key]
id_ = json_map[mod_id_key]
location = json_map[mod_location_key]
assert location == "internal"
idt_text = json_map.get(mod_idt_text_key)
allowed_bases_list = json_map.get(mod_allowed_bases_key)
allowed_bases = frozenset(allowed_bases_list) if allowed_bases_list is not None else None
return ModificationInternal(idt_text=idt_text, id=id, allowed_bases=allowed_bases)
return ModificationInternal(idt_text=idt_text, id=id_, allowed_bases=allowed_bases)

@staticmethod
def modification_type() -> ModificationType:
Expand Down
46 changes: 23 additions & 23 deletions nuad/np.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from __future__ import annotations

from typing import Tuple, List, Collection, Optional, Union, Sequence, Dict, Iterable
from typing import Tuple, List, Collection, Sequence, Dict, Iterable
from dataclasses import dataclass
import math
import itertools as it
Expand All @@ -36,7 +36,7 @@ def idx2seq(idx: int, length: int) -> str:
return ''.join(seq)


def seq2arr(seq: str, base2bits_local: Optional[Dict[str, int]] = None) -> np.ndarray:
def seq2arr(seq: str, base2bits_local: Dict[str, int] | None = None) -> np.ndarray:
"""Convert seq (string with DNA alphabet) to numpy array with integers 0,1,2,3."""
if base2bits_local is None:
base2bits_local = base2bits
Expand Down Expand Up @@ -395,18 +395,16 @@ def make_array_with_random_subset_of_dna_seqs_hamming_distance(


# https://stackoverflow.com/a/59328647/5339430
def random_choice_noreplace(l: np.ndarray, n_sample: int, num_draw: int,
def random_choice_noreplace(lst: np.ndarray, n_sample: int, num_draw: int,
rng: np.random.Generator) -> np.ndarray:
'''
l: 1-D array or list
n_sample: sample size for each draw
num_draw: number of draws
# lst: 1-D array or list
# n_sample: sample size for each draw
# num_draw: number of draws

Intuition: Randomly generate numbers, get the index of the smallest n_sample number for each row.
'''
l = np.array(l)
random_array_floats = rng.random((num_draw, len(l)))
return l[np.argpartition(random_array_floats, n_sample - 1, axis=-1)[:, :n_sample]]
# Intuition: Randomly generate numbers, get the index of the smallest n_sample number for each row.
lst = np.array(lst)
random_array_floats = rng.random((num_draw, len(lst)))
return lst[np.argpartition(random_array_floats, n_sample - 1, axis=-1)[:, :n_sample]]


# @lru_cache(maxsize=10000000)
Expand Down Expand Up @@ -673,15 +671,15 @@ class DNASeqList:
"""Random number generator to use."""

def __init__(self,
length: Optional[int] = None,
num_random_seqs: Optional[int] = None,
length: int | None = None,
num_random_seqs: int | None = None,
shuffle: bool = False,
alphabet: Collection[str] = ('A', 'C', 'G', 'T'),
seqs: Optional[Sequence[str]] = None,
seqs: Sequence[str] | None = None,
seqarr: np.ndarray = None,
filename: Optional[str] = None,
filename: str | None = None,
rng: np.random.Generator = default_rng,
hamming_distance_from_sequence: Optional[Tuple[int, str]] = None):
hamming_distance_from_sequence: Tuple[int, str] | None = None):
"""
Creates a set of DNA sequences, all of the same length.
Expand Down Expand Up @@ -784,6 +782,8 @@ def random_choice(self, num: int, rng: np.random.Generator = default_rng,
:param num:
number of sequences to sample
:param rng:
random number generator to use
:param replace:
whether to sample with replacement
:return:
Expand Down Expand Up @@ -889,7 +889,7 @@ def keep_seqs_at_indices(self, indices: Iterable[int]) -> None:
self.seqarr = self.seqarr[indices]
self._update_size()

def __getitem__(self, slice_: Union[int, slice]) -> Union[str, List[str]]:
def __getitem__(self, slice_: int | slice) -> str | List[str]:
if isinstance(slice_, int):
return self.get_seq_str(slice_)
elif isinstance(slice_, slice):
Expand Down Expand Up @@ -953,7 +953,7 @@ def hamming_map(self, sequence: str) -> Dict[int, DNASeqList]:
# print(f'times in each iteration: {times}')
return distance_map

def sublist(self, start: int, end: Optional[int] = None) -> DNASeqList:
def sublist(self, start: int, end: int | None = None) -> DNASeqList:
"""Return sublist of DNASeqList from `start`, inclusive, to `end`, exclusive.
If `end` is not specified, goes until the end of the list."""
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def filter_seqs_by_g_quad_c_quad(self) -> DNASeqList:
"""Removes any sticky ends with 4 G's or C's in a row (a quadruplex)."""
return self.filter_substring(['GGGG', 'CCCC'])

def index(self, sequence: Union[str, np.ndarray]) -> int:
def index(self, sequence: str | np.ndarray) -> int:
# finds index of sequence in (rows of) self.seqarr
# raises IndexError if not present
# taken from https://stackoverflow.com/questions/40382384/finding-a-matching-row-in-a-numpy-matrix
Expand All @@ -1104,7 +1104,7 @@ def index(self, sequence: Union[str, np.ndarray]) -> int:
return int(first_index)


def create_toeplitz(seqlen: int, sublen: int, indices: Optional[Sequence[int]] = None) -> np.ndarray:
def create_toeplitz(seqlen: int, sublen: int, indices: Sequence[int] | None = None) -> np.ndarray:
"""Creates a toeplitz matrix, useful for finding subsequences.
`seqlen` is length of larger sequence; `sublen` is length of substring we're checking for.
Expand Down Expand Up @@ -1223,7 +1223,7 @@ def hash_ndarray(arr: np.ndarray) -> int:


CACHE_WC = False
_calculate_wc_energies_cache: Optional[np.ndarray] = None
_calculate_wc_energies_cache: np.ndarray | None = None
_calculate_wc_energies_cache_hash: int = 0


Expand Down Expand Up @@ -1254,7 +1254,7 @@ def wc_arr(seqarr: np.ndarray) -> np.ndarray:

def prefilter_length_10_11(low_dg: float, high_dg: float, temperature: float, end_gc: bool,
convert_to_list: bool = True) \
-> Union[Tuple[List[str], List[str]], Tuple[DNASeqList, DNASeqList]]:
-> Tuple[List[str], List[str]] | Tuple[DNASeqList, DNASeqList]:
"""Return sequences of length 10 and 11 with wc energies between given values."""
s10: DNASeqList = DNASeqList(length=10)
s11: DNASeqList = DNASeqList(length=11)
Expand Down
Loading

0 comments on commit 3b88db2

Please sign in to comment.