Skip to content

Commit

Permalink
reformatted with black and isort
Browse files Browse the repository at this point in the history
  • Loading branch information
reece committed Sep 14, 2023
1 parent 9b6db22 commit f3429f1
Show file tree
Hide file tree
Showing 53 changed files with 1,394 additions and 400 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ exclude_lines = [
]

[tool.black]
line-length = 120
line-length = 100


# [tool.flake8]
Expand Down
56 changes: 42 additions & 14 deletions src/hgvs/alignmentmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ def __init__(self, hdp, tx_ac, alt_ac, alt_aln_method):
def __str__(self):
return (
"{self.__class__.__name__}: {self.tx_ac} ~ {self.alt_ac} ~ {self.alt_aln_method}; "
"{strand_pm} strand; offset={self.gc_offset}".format(self=self, strand_pm=strand_int_to_pm(self.strand))
"{strand_pm} strand; offset={self.gc_offset}".format(
self=self, strand_pm=strand_int_to_pm(self.strand)
)
)

def g_to_n(self, g_interval, strict_bounds=None):
Expand All @@ -157,19 +159,30 @@ def g_to_n(self, g_interval, strict_bounds=None):
if strict_bounds is None:
strict_bounds = global_config.mapping.strict_bounds

grs, gre = g_interval.start.base - 1 - self.gc_offset, g_interval.end.base - 1 - self.gc_offset
grs, gre = (
g_interval.start.base - 1 - self.gc_offset,
g_interval.end.base - 1 - self.gc_offset,
)
# frs, fre = (f)orward (r)na (s)tart & (e)nd; forward w.r.t. genome
frs, frs_offset, frs_cigar = self.cigarmapper.map_ref_to_tgt(pos=grs, end="start", strict_bounds=strict_bounds)
fre, fre_offset, fre_cigar = self.cigarmapper.map_ref_to_tgt(pos=gre, end="end", strict_bounds=strict_bounds)
frs, frs_offset, frs_cigar = self.cigarmapper.map_ref_to_tgt(
pos=grs, end="start", strict_bounds=strict_bounds
)
fre, fre_offset, fre_cigar = self.cigarmapper.map_ref_to_tgt(
pos=gre, end="end", strict_bounds=strict_bounds
)

if self.strand == -1:
frs, fre = self.tgt_len - 1 - fre, self.tgt_len - 1 - frs
frs_offset, fre_offset = -fre_offset, -frs_offset

# The returned interval would be uncertain when locating at alignment gaps
return hgvs.location.BaseOffsetInterval(
start=hgvs.location.BaseOffsetPosition(base=_zbc_to_hgvs(frs), offset=frs_offset, datum=Datum.SEQ_START),
end=hgvs.location.BaseOffsetPosition(base=_zbc_to_hgvs(fre), offset=fre_offset, datum=Datum.SEQ_START),
start=hgvs.location.BaseOffsetPosition(
base=_zbc_to_hgvs(frs), offset=frs_offset, datum=Datum.SEQ_START
),
end=hgvs.location.BaseOffsetPosition(
base=_zbc_to_hgvs(fre), offset=fre_offset, datum=Datum.SEQ_START
),
uncertain=frs_cigar in "DI" or fre_cigar in "DI",
)

Expand All @@ -189,8 +202,12 @@ def n_to_g(self, n_interval, strict_bounds=None):
start_offset, end_offset = -end_offset, -start_offset

# returns the genomic range start (grs) and end (gre)
grs, _, grs_cigar = self.cigarmapper.map_tgt_to_ref(pos=frs, end="start", strict_bounds=strict_bounds)
gre, _, gre_cigar = self.cigarmapper.map_tgt_to_ref(pos=fre, end="end", strict_bounds=strict_bounds)
grs, _, grs_cigar = self.cigarmapper.map_tgt_to_ref(
pos=frs, end="start", strict_bounds=strict_bounds
)
gre, _, gre_cigar = self.cigarmapper.map_tgt_to_ref(
pos=fre, end="end", strict_bounds=strict_bounds
)
grs, gre = grs + self.gc_offset + 1, gre + self.gc_offset + 1
gs, ge = grs + start_offset, gre + end_offset

Expand All @@ -207,15 +224,19 @@ def n_to_c(self, n_interval, strict_bounds=None):
if strict_bounds is None:
strict_bounds = global_config.mapping.strict_bounds

if self.cds_start_i is None: # cds_start_i defined iff cds_end_i defined; see assertion above
if (
self.cds_start_i is None
): # cds_start_i defined iff cds_end_i defined; see assertion above
raise HGVSUsageError(
"CDS is undefined for {self.tx_ac}; cannot map to c. coordinate (non-coding transcript?)".format(
self=self
)
)

if strict_bounds and (n_interval.start.base <= 0 or n_interval.end.base > self.tgt_len):
raise HGVSInvalidIntervalError("The given coordinate is outside the bounds of the reference sequence.")
raise HGVSInvalidIntervalError(
"The given coordinate is outside the bounds of the reference sequence."
)

def pos_n_to_c(pos):
if pos.base <= self.cds_start_i:
Expand All @@ -230,7 +251,9 @@ def pos_n_to_c(pos):
return hgvs.location.BaseOffsetPosition(base=c, offset=pos.offset, datum=c_datum)

c_interval = hgvs.location.BaseOffsetInterval(
start=pos_n_to_c(n_interval.start), end=pos_n_to_c(n_interval.end), uncertain=n_interval.uncertain
start=pos_n_to_c(n_interval.start),
end=pos_n_to_c(n_interval.end),
uncertain=n_interval.uncertain,
)
return c_interval

Expand Down Expand Up @@ -259,10 +282,14 @@ def pos_c_to_n(pos):
if n <= 0 or n > self.tgt_len:
if strict_bounds:
raise HGVSInvalidIntervalError(f"c.{pos} coordinate is out of bounds")
return hgvs.location.BaseOffsetPosition(base=n, offset=pos.offset, datum=Datum.SEQ_START)
return hgvs.location.BaseOffsetPosition(
base=n, offset=pos.offset, datum=Datum.SEQ_START
)

n_interval = hgvs.location.BaseOffsetInterval(
start=pos_c_to_n(c_interval.start), end=pos_c_to_n(c_interval.end), uncertain=c_interval.uncertain
start=pos_c_to_n(c_interval.start),
end=pos_c_to_n(c_interval.end),
uncertain=c_interval.uncertain,
)
return n_interval

Expand All @@ -278,7 +305,8 @@ def c_to_g(self, c_interval, strict_bounds=None):
def is_coding_transcript(self):
if (self.cds_start_i is not None) ^ (self.cds_end_i is not None):
raise HGVSError(
"{self.tx_ac}: CDS start_i and end_i" " must be both defined or both undefined".format(self=self)
"{self.tx_ac}: CDS start_i and end_i"
" must be both defined or both undefined".format(self=self)
)
return self.cds_start_i is not None

Expand Down
43 changes: 32 additions & 11 deletions src/hgvs/assemblymapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,12 @@ def __init__(
self.in_par_assume = in_par_assume
self._norm = None
if self.normalize:
self._norm = hgvs.normalizer.Normalizer(hdp, alt_aln_method=alt_aln_method, validate=False)
self._assembly_map = {k: v for k, v in hdp.get_assembly_map(self.assembly_name).items() if k.startswith("NC_")}
self._norm = hgvs.normalizer.Normalizer(
hdp, alt_aln_method=alt_aln_method, validate=False
)
self._assembly_map = {
k: v for k, v in hdp.get_assembly_map(self.assembly_name).items() if k.startswith("NC_")
}
self._assembly_accessions = set(self._assembly_map.keys())

def __repr__(self):
Expand All @@ -101,30 +105,42 @@ def __repr__(self):
)

def g_to_c(self, var_g, tx_ac):
var_out = super(AssemblyMapper, self).g_to_c(var_g, tx_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).g_to_c(
var_g, tx_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def g_to_n(self, var_g, tx_ac):
var_out = super(AssemblyMapper, self).g_to_n(var_g, tx_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).g_to_n(
var_g, tx_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def g_to_t(self, var_g, tx_ac):
var_out = super(AssemblyMapper, self).g_to_t(var_g, tx_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).g_to_t(
var_g, tx_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def c_to_g(self, var_c):
alt_ac = self._alt_ac_for_tx_ac(var_c.ac)
var_out = super(AssemblyMapper, self).c_to_g(var_c, alt_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).c_to_g(
var_c, alt_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def n_to_g(self, var_n):
alt_ac = self._alt_ac_for_tx_ac(var_n.ac)
var_out = super(AssemblyMapper, self).n_to_g(var_n, alt_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).n_to_g(
var_n, alt_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def t_to_g(self, var_t):
alt_ac = self._alt_ac_for_tx_ac(var_t.ac)
var_out = super(AssemblyMapper, self).t_to_g(var_t, alt_ac, alt_aln_method=self.alt_aln_method)
var_out = super(AssemblyMapper, self).t_to_g(
var_t, alt_ac, alt_aln_method=self.alt_aln_method
)
return self._maybe_normalize(var_out)

def t_to_p(self, var_t):
Expand All @@ -142,7 +158,9 @@ def t_to_p(self, var_t):
return "non-coding"
if var_t.type == "c":
return self.c_to_p(var_t)
raise HGVSInvalidVariantError("Expected a coding (c.) or non-coding (n.) variant; got " + str(var_t))
raise HGVSInvalidVariantError(
"Expected a coding (c.) or non-coding (n.) variant; got " + str(var_t)
)

def c_to_n(self, var_c):
var_out = super(AssemblyMapper, self).c_to_n(var_c)
Expand Down Expand Up @@ -173,7 +191,8 @@ def _alt_ac_for_tx_ac(self, tx_ac):
alt_acs = [
e["alt_ac"]
for e in self.hdp.get_tx_mapping_options(tx_ac)
if e["alt_aln_method"] == self.alt_aln_method and e["alt_ac"] in self._assembly_accessions
if e["alt_aln_method"] == self.alt_aln_method
and e["alt_ac"] in self._assembly_accessions
]

if not alt_acs:
Expand All @@ -187,7 +206,9 @@ def _alt_ac_for_tx_ac(self, tx_ac):
if len(alt_acs) > 1:
names = set(self._assembly_map[ac] for ac in alt_acs)
if names != set("XY"):
alts = ", ".join(["{ac} ({n})".format(ac=ac, n=self._assembly_map[ac]) for ac in alt_acs])
alts = ", ".join(
["{ac} ({n})".format(ac=ac, n=self._assembly_map[ac]) for ac in alt_acs]
)
raise HGVSError(
"Multiple chromosomal alignments for {tx_ac} in {an}"
" using {am} (non-pseudoautosomal region) [{alts}]".format(
Expand Down
28 changes: 21 additions & 7 deletions src/hgvs/dataproviders/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,40 @@ def __init__(self, mode=None, cache=None):
maxsize = None
print(f"{__file__}: Using unlimited cache size")

self.data_version = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.data_version)
self.schema_version = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.schema_version)
self.data_version = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.data_version
)
self.schema_version = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.schema_version
)
self.get_acs_for_protein_seq = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_acs_for_protein_seq
)
self.get_gene_info = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_gene_info)
self.get_gene_info = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_gene_info
)
self.get_pro_ac_for_tx_ac = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_pro_ac_for_tx_ac
)
self.get_seq = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_seq)
self.get_similar_transcripts = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_similar_transcripts
)
self.get_tx_exons = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_tx_exons)
self.get_tx_for_gene = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_tx_for_gene)
self.get_tx_for_region = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_tx_for_region)
self.get_tx_exons = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_exons
)
self.get_tx_for_gene = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_for_gene
)
self.get_tx_for_region = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_for_region
)
self.get_tx_identity_info = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_identity_info
)
self.get_tx_info = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(self.get_tx_info)
self.get_tx_info = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_info
)
self.get_tx_mapping_options = lru_cache(maxsize=maxsize, mode=self.mode, cache=self.cache)(
self.get_tx_mapping_options
)
Expand Down
35 changes: 29 additions & 6 deletions src/hgvs/dataproviders/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ def _get_ncbi_db_url():
return hgvs.global_config["NCBI"][url_key]


def connect(db_url=None, pooling=hgvs.global_config.uta.pooling, application_name=None, mode=None, cache=None):
def connect(
db_url=None,
pooling=hgvs.global_config.uta.pooling,
application_name=None,
mode=None,
cache=None,
):
"""Connect to a uta/ncbi database instance.
:param db_url: URL for database connection
Expand Down Expand Up @@ -96,7 +102,9 @@ def connect(db_url=None, pooling=hgvs.global_config.uta.pooling, application_nam

url = _parse_url(db_url)
if url.scheme == "postgresql":
conn = NCBI_postgresql(url=url, pooling=pooling, application_name=application_name, mode=mode, cache=cache)
conn = NCBI_postgresql(
url=url, pooling=pooling, application_name=application_name, mode=mode, cache=cache
)
else:
# fell through connection scheme cases
raise RuntimeError("{url.scheme} in {url} is not currently supported".format(url=url))
Expand Down Expand Up @@ -234,7 +242,14 @@ def store_assocacs(self, hgnc, tx_ac, gene_id, pro_ac, origin):


class NCBI_postgresql(NCBIBase):
def __init__(self, url, pooling=hgvs.global_config.uta.pooling, application_name=None, mode=None, cache=None):
def __init__(
self,
url,
pooling=hgvs.global_config.uta.pooling,
application_name=None,
mode=None,
cache=None,
):
if url.schema is None:
raise Exception("No schema name provided in {url}".format(url=url))
self.application_name = application_name
Expand Down Expand Up @@ -282,7 +297,9 @@ def _connect(self):

def _ensure_schema_exists(self):
# N.B. On AWS RDS, information_schema.schemata always returns zero rows
r = self._fetchone("select exists(SELECT 1 FROM pg_namespace WHERE nspname = %s)", [self.url.schema])
r = self._fetchone(
"select exists(SELECT 1 FROM pg_namespace WHERE nspname = %s)", [self.url.schema]
)
if r[0]:
return
raise HGVSDataNotAvailableError(
Expand Down Expand Up @@ -329,7 +346,9 @@ def _get_cursor(self, n_retries=1):
break

except psycopg2.OperationalError:
_logger.warning("Lost connection to {url}; attempting reconnect".format(url=self.url))
_logger.warning(
"Lost connection to {url}; attempting reconnect".format(url=self.url)
)
if self.pooling:
self._pool.closeall()
self._connect()
Expand All @@ -339,7 +358,11 @@ def _get_cursor(self, n_retries=1):

else:
# N.B. Probably never reached
raise HGVSError("Permanently lost connection to {url} ({n} retries)".format(url=self.url, n=n_retries))
raise HGVSError(
"Permanently lost connection to {url} ({n} retries)".format(
url=self.url, n=n_retries
)
)


class ParseResult(urlparse.ParseResult):
Expand Down
4 changes: 3 additions & 1 deletion src/hgvs/dataproviders/seqfetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def _fetch_seq_seqrepo(ac, start_i=None, end_i=None):
from biocommons.seqrepo.dataproxy import SeqRepoRESTDataProxy

self.sr = SeqRepoRESTDataProxy(seqrepo_url)
self.fetcher = lambda ac, start_i=None, end_i=None: self.sr.get_sequence(ac, start_i, end_i)
self.fetcher = lambda ac, start_i=None, end_i=None: self.sr.get_sequence(
ac, start_i, end_i
)
self.source = f"SeqRepo REST ({seqrepo_url})"
else:
self.sr = None
Expand Down

0 comments on commit f3429f1

Please sign in to comment.