Skip to content

Commit

Permalink
more static typing-related fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
chrislit committed Feb 1, 2020
1 parent 0e07f19 commit 4394373
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 44 deletions.
4 changes: 2 additions & 2 deletions abydos/distance/_azzoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
AZZOO similarity
"""

from typing import Any, Collection, Counter as TCounter, Optional, Union
from typing import Any, Collection, Counter as TCounter, Optional, Union, cast

from ._token_distance import _TokenDistance
from ..tokenizer import _Tokenizer
Expand Down Expand Up @@ -141,7 +141,7 @@ def sim_score(self, src: str, tar: str) -> float:
a = self._intersection_card()
d = self._total_complement_card()

return a + self.params['sigma'] * d
return cast(float, a + self.params['sigma'] * d)

def sim(self, src: str, tar: str) -> float:
"""Return the AZZOO similarity of two strings.
Expand Down
2 changes: 1 addition & 1 deletion abydos/distance/_hellinger.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def dist(self, src: str, tar: str) -> float:
norm = (
2
* sum(
max(self._src_tokens[tok], self._tar_tokens[tok]) ** 2 # type: ignore # noqa: E501
max(self._src_tokens[tok], self._tar_tokens[tok]) ** 2
for tok in self._total().keys()
)
) ** 0.5
Expand Down
7 changes: 2 additions & 5 deletions abydos/distance/_meta_levenshtein.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(
"""
super(MetaLevenshtein, self).__init__(**kwargs)
self._corpus = corpus
self._metric = cast(_Distance, metric)
self._metric = JaroWinkler() if metric is None else metric
self._normalizer = normalizer

qval = 2 if 'qval' not in self.params else self.params['qval']
Expand All @@ -103,9 +103,6 @@ def __init__(
else QGrams(qval=qval, start_stop='$#', skip=0, scaler=None)
)

if self._metric is None:
self._metric = JaroWinkler()

def dist_abs(self, src: str, tar: str) -> float:
"""Return the Meta-Levenshtein distance of two strings.
Expand Down Expand Up @@ -195,7 +192,7 @@ def _dist(s_tok: str, t_tok: str) -> float:
+ _dist(src_ordered[i], tar_ordered[j]), # sub/==
)

return d_mat[len(src_ordered), len(tar_ordered)]
return cast(float, d_mat[len(src_ordered), len(tar_ordered)])

def dist(self, src: str, tar: str) -> float:
"""Return the normalized Levenshtein distance between two strings.
Expand Down
4 changes: 2 additions & 2 deletions abydos/distance/_minhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""

from hashlib import sha512
from typing import Any, Optional
from typing import Any, Optional, cast

import numpy as np

Expand Down Expand Up @@ -150,7 +150,7 @@ def sim(self, src: str, tar: str) -> float:
),
)

return (hashes_src == hashes_tar).sum() / k
return cast(float, (hashes_src == hashes_tar).sum() / k)


if __name__ == '__main__':
Expand Down
7 changes: 4 additions & 3 deletions abydos/distance/_minkowski.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Minkowski distance & similarity
"""

from typing import Any, Collection, Counter as TCounter, Optional, Union
from typing import Any, Collection, Counter as TCounter, Optional, Union, cast

from ._token_distance import _TokenDistance
from ..tokenizer import _Tokenizer
Expand Down Expand Up @@ -144,10 +144,11 @@ def dist_abs(self, src: str, tar: str, normalized: bool = False) -> float:
if self.params['pval'] == 0:
# This is the l_0 "norm" as developed by David Donoho
return sum(_ != 0 for _ in diffs) / normalizer
return (
return cast(
float,
sum(_ ** self.params['pval'] for _ in diffs)
** (1 / self.params['pval'])
/ normalizer
/ normalizer,
)

def dist(self, src: str, tar: str) -> float:
Expand Down
4 changes: 2 additions & 2 deletions abydos/distance/_mutual_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""

from math import log2
from typing import Any, Collection, Counter as TCounter, Optional, Union
from typing import Any, Collection, Counter as TCounter, Optional, Union, cast

from ._token_distance import _TokenDistance
from ..tokenizer import _Tokenizer
Expand Down Expand Up @@ -179,7 +179,7 @@ def sim(self, src: str, tar: str) -> float:
if not norm:
norm = [1]

return (1.0 + score / max(norm)) / 2.0
return cast(float, (1.0 + score / max(norm)) / 2.0)
return 0.0


Expand Down
2 changes: 1 addition & 1 deletion abydos/distance/_quantitative_dice.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def sim(self, src: str, tar: str) -> float:
return (
2
* sum(
self._src_tokens[tok] * self._tar_tokens[tok] # type: ignore
self._src_tokens[tok] * self._tar_tokens[tok]
for tok in alphabet
)
/ (
Expand Down
2 changes: 1 addition & 1 deletion abydos/distance/_rouge_w.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(
super(RougeW, self).__init__(**kwargs)

if f_func is not None:
self._f_func = f_func # type: Callable[[float], float]
self._f_func = f_func # type: Callable[[float], float]
else:
self._f_func = RougeW._square # noqa: SF01

Expand Down
2 changes: 1 addition & 1 deletion abydos/distance/_saps.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Syllable Alignment Pattern Searching tokenizer
"""

from typing import Any, Callable, List, Optional, Tuple
from typing import Any, Callable, List, Optional, Tuple, cast

from numpy import int as np_int
from numpy import zeros as np_zeros
Expand Down
2 changes: 1 addition & 1 deletion abydos/distance/_sift4_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Sift4 Extended approximate string distance
"""

from typing import Any, Callable, Dict, List, Optional, Union, cast
from typing import Any, Callable, Dict, List, Optional, Union

from ._distance import _Distance
from ._sift4 import Sift4
Expand Down
7 changes: 2 additions & 5 deletions abydos/distance/_softtf_idf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from collections import defaultdict
from math import log1p
from typing import Any, DefaultDict, Optional, Tuple, cast
from typing import Any, DefaultDict, Optional, Tuple

from ._distance import _Distance
from ._jaro_winkler import JaroWinkler
Expand Down Expand Up @@ -103,11 +103,8 @@ def __init__(
"""
super(SoftTFIDF, self).__init__(tokenizer=tokenizer, **kwargs)
self._corpus = corpus
self._metric = cast(_Distance, metric)
self._threshold = threshold

if self._metric is None:
self._metric = JaroWinkler()
self._metric = JaroWinkler() if metric is None else metric

def sim(self, src: str, tar: str) -> float:
"""Return the SoftTF-IDF similarity of two strings.
Expand Down
10 changes: 7 additions & 3 deletions abydos/distance/_token_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Optional,
Tuple,
Union,
cast,
)

import numpy as np
Expand Down Expand Up @@ -286,7 +287,9 @@ def _norm_inverse(x: float, _squares: int, pop: float) -> float:
def _norm_complement(x: float, _squares: int, pop: float) -> float:
return pop - x

def _tokenize(self, src: Union[str, TCounter], tar: Union[str, TCounter]) -> '_TokenDistance':
def _tokenize(
self, src: Union[str, TCounter], tar: Union[str, TCounter]
) -> '_TokenDistance':
"""Return the Q-Grams in src & tar.
Parameters
Expand Down Expand Up @@ -595,12 +598,13 @@ def _soft_intersection(self) -> TCounter[str]:

def _membership(src: str, tar: str) -> float:
greater_length = max(len(src), len(tar))
return (
return cast(
float,
max(
greater_length - self.params['metric'].dist_abs(src, tar),
self._lcprefix.dist_abs(src, tar),
)
/ greater_length
/ greater_length,
)

def _token_src_tar_int(
Expand Down
30 changes: 19 additions & 11 deletions abydos/distance/_tversky.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Tversky index
"""

from typing import Any, Optional
from typing import Any, Optional, cast

from ._token_distance import _TokenDistance
from ..tokenizer import _Tokenizer
Expand Down Expand Up @@ -175,21 +175,29 @@ def sim(self, src: str, tar: str) -> float:
return 0.0

if self.params['bias'] is None:
return q_intersection_mag / (
return cast(
float,
q_intersection_mag
+ self.params['alpha'] * q_src_mag
+ self.params['beta'] * q_tar_mag
/ (
q_intersection_mag
+ self.params['alpha'] * q_src_mag
+ self.params['beta'] * q_tar_mag
),
)

a_val, b_val = sorted((q_src_mag, q_tar_mag))
c_val = q_intersection_mag + self.params['bias']
return c_val / (
self.params['beta']
* (
self.params['alpha'] * a_val
+ (1 - self.params['alpha']) * b_val
)
+ c_val
return cast(
float,
c_val
/ (
self.params['beta']
* (
self.params['alpha'] * a_val
+ (1 - self.params['alpha']) * b_val
)
+ c_val
),
)


Expand Down
8 changes: 2 additions & 6 deletions abydos/distance/_typo.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,8 @@ def dist_abs(self, src: str, tar: str) -> float:
else:
keyboard = self._keyboard[self._layout]

lowercase = {
item for sublist in keyboard[0] for item in sublist
}
uppercase = {
item for sublist in keyboard[1] for item in sublist
}
lowercase = {item for sublist in keyboard[0] for item in sublist}
uppercase = {item for sublist in keyboard[1] for item in sublist}
keys = set(chain(*chain(*keyboard)))

def _kb_array_for_char(char: str) -> Tuple[Tuple[str, ...], ...]:
Expand Down

0 comments on commit 4394373

Please sign in to comment.