Skip to content

Commit

Permalink
put params in __init__
Browse files Browse the repository at this point in the history
  • Loading branch information
chrislit committed Dec 4, 2018
1 parent 7a70821 commit a23b70c
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 83 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Abydos
:target: https://app.fossa.io/projects/git%2Bgithub.com%2Fchrislit%2Fabydos?ref=badge_shield
:alt: FOSSA Status

.. |pylint| image:: https://img.shields.io/badge/Pylint-9.19/10-yellowgreen.svg
.. |pylint| image:: https://img.shields.io/badge/Pylint-9.18/10-yellowgreen.svg
:target: #
:alt: Pylint Score

Expand Down
26 changes: 20 additions & 6 deletions abydos/fingerprint/_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,31 @@ class Count(_Fingerprint):
.. versionadded:: 0.3.6
"""

def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
"""Return the count fingerprint.
def __init__(self, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
"""Initialize Count instance.
Parameters
----------
word : str
The word to fingerprint
n_bits : int
Number of bits in the fingerprint returned
most_common : list
The most common tokens in the target language, ordered by frequency
.. versionadded:: 0.4.0
"""
super(_Fingerprint, self).__init__()
self._n_bits = n_bits
self._most_common = most_common

def fingerprint(self, word):
"""Return the count fingerprint.
Parameters
----------
word : str
The word to fingerprint
Returns
-------
int
Expand All @@ -82,13 +95,14 @@ def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
Encapsulated in class
"""
n_bits = self._n_bits
if n_bits % 2:
n_bits += 1

word = Counter(word)
fingerprint = 0

for letter in most_common:
for letter in self._most_common:
if n_bits:
fingerprint <<= 2
fingerprint += word[letter] & 3
Expand Down Expand Up @@ -143,7 +157,7 @@ def count_fingerprint(word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
.. versionadded:: 0.3.0
"""
return Count().fingerprint(word, n_bits, most_common)
return Count(n_bits, most_common).fingerprint(word)


if __name__ == '__main__':
Expand Down
26 changes: 20 additions & 6 deletions abydos/fingerprint/_occurrence.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,31 @@ class Occurrence(_Fingerprint):
.. versionadded:: 0.3.6
"""

def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
"""Return the occurrence fingerprint.
def __init__(self, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
"""Initialize Count instance.
Parameters
----------
word : str
The word to fingerprint
n_bits : int
Number of bits in the fingerprint returned
most_common : list
The most common tokens in the target language, ordered by frequency
.. versionadded:: 0.4.0
"""
super(_Fingerprint, self).__init__()
self._n_bits = n_bits
self._most_common = most_common

def fingerprint(self, word):
"""Return the occurrence fingerprint.
Parameters
----------
word : str
The word to fingerprint
Returns
-------
int
Expand All @@ -80,10 +93,11 @@ def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
Encapsulated in class
"""
n_bits = self._n_bits
word = set(word)
fingerprint = 0

for letter in most_common:
for letter in self._most_common:
if letter in word:
fingerprint += 1
n_bits -= 1
Expand Down Expand Up @@ -142,7 +156,7 @@ def occurrence_fingerprint(
.. versionadded:: 0.3.0
"""
return Occurrence().fingerprint(word, n_bits, most_common)
return Occurrence(n_bits, most_common).fingerprint(word)


if __name__ == '__main__':
Expand Down
24 changes: 21 additions & 3 deletions abydos/fingerprint/_occurrence_halved.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,24 @@ class OccurrenceHalved(_Fingerprint):
.. versionadded:: 0.3.6
"""

def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
def __init__(self, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
"""Initialize Count instance.
Parameters
----------
n_bits : int
Number of bits in the fingerprint returned
most_common : list
The most common tokens in the target language, ordered by frequency
.. versionadded:: 0.4.0
"""
super(_Fingerprint, self).__init__()
self._n_bits = n_bits
self._most_common = most_common

def fingerprint(self, word):
"""Return the occurrence halved fingerprint.
Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
Expand Down Expand Up @@ -82,6 +99,7 @@ def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
Encapsulated in class
"""
n_bits = self._n_bits
if n_bits % 2:
n_bits += 1

Expand All @@ -90,7 +108,7 @@ def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
w_2 = set(word[w_len:])
fingerprint = 0

for letter in most_common:
for letter in self._most_common:
if n_bits:
fingerprint <<= 1
if letter in w_1:
Expand Down Expand Up @@ -151,7 +169,7 @@ def occurrence_halved_fingerprint(
.. versionadded:: 0.3.0
"""
return OccurrenceHalved().fingerprint(word, n_bits, most_common)
return OccurrenceHalved(n_bits, most_common).fingerprint(word)


if __name__ == '__main__':
Expand Down
62 changes: 35 additions & 27 deletions abydos/fingerprint/_phonetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@

from ._string import String
from .. import __version__
from ..phonetic import double_metaphone
from ..phonetic import DoubleMetaphone, double_metaphone
from ..phonetic._phonetic import _Phonetic


__all__ = ['Phonetic', 'phonetic_fingerprint']
Expand All @@ -52,30 +53,32 @@ class Phonetic(String):
.. versionadded:: 0.3.6
"""

def fingerprint(
self,
phrase,
phonetic_algorithm=double_metaphone,
joiner=' ',
*args,
**kwargs
):
"""Return the phonetic fingerprint of a phrase.
def __init__(self, phonetic_algorithm=None, joiner=' '):
"""Initialize Phonetic instance.
Parameters
----------
phrase : str
The string from which to calculate the phonetic fingerprint
phonetic_algorithm : function
A phonetic algorithm that takes a string and returns a string
(presumably a phonetic representation of the original string). By
default, this function uses :py:func:`.double_metaphone`.
joiner : str
The string that will be placed between each word
*args
Variable length argument list
**kwargs
Arbitrary keyword arguments
.. versionadded:: 0.4.0
"""
self._phonetic_algorithm = phonetic_algorithm
if phonetic_algorithm is None:
self._phonetic_algorithm = DoubleMetaphone()

self._joiner = joiner

def fingerprint(self, phrase):
"""Return the phonetic fingerprint of a phrase.
Parameters
----------
phrase : str
The string from which to calculate the phonetic fingerprint
Returns
-------
Expand All @@ -87,9 +90,10 @@ def fingerprint(
>>> pf = Phonetic()
>>> pf.fingerprint('The quick brown fox jumped over the lazy dog.')
'0 afr fks jmpt kk ls prn tk'
>>> from abydos.phonetic import soundex
>>> pf.fingerprint('The quick brown fox jumped over the lazy dog.',
... phonetic_algorithm=soundex)
>>> from abydos.phonetic import Soundex
>>> pf = Phonetic(Soundex())
>>> pf.fingerprint('The quick brown fox jumped over the lazy dog.')
'b650 d200 f200 j513 l200 o160 q200 t000'
.. versionadded:: 0.1.0
Expand All @@ -99,11 +103,14 @@ def fingerprint(
"""
phonetic = ''
for word in phrase.split():
word = phonetic_algorithm(word, *args, **kwargs)
if isinstance(self._phonetic_algorithm, _Phonetic):
word = self._phonetic_algorithm.encode(word)
else:
word = self._phonetic_algorithm(word)
if not isinstance(word, text_type) and hasattr(word, '__iter__'):
word = word[0]
phonetic += word + joiner
phonetic = phonetic[: -len(joiner)]
phonetic += word + self._joiner
phonetic = phonetic[: -len(self._joiner)]
return super(Phonetic, self).fingerprint(phonetic)


Expand Down Expand Up @@ -144,6 +151,7 @@ def phonetic_fingerprint(
--------
>>> phonetic_fingerprint('The quick brown fox jumped over the lazy dog.')
'0 afr fks jmpt kk ls prn tk'
>>> from abydos.phonetic import soundex
>>> phonetic_fingerprint('The quick brown fox jumped over the lazy dog.',
... phonetic_algorithm=soundex)
Expand All @@ -152,9 +160,9 @@ def phonetic_fingerprint(
.. versionadded:: 0.1.0
"""
return Phonetic().fingerprint(
phrase, phonetic_algorithm, joiner, *args, **kwargs
)
return Phonetic(
lambda phrase: phonetic_algorithm(phrase, *args, **kwargs), joiner
).fingerprint(phrase)


if __name__ == '__main__':
Expand Down
45 changes: 27 additions & 18 deletions abydos/fingerprint/_position.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,33 @@ class Position(_Fingerprint):
.. versionadded:: 0.3.6
"""

def fingerprint(
self,
word,
n_bits=16,
most_common=MOST_COMMON_LETTERS_CG,
bits_per_letter=3,
def __init__(
self, n_bits=16, most_common=MOST_COMMON_LETTERS_CG, bits_per_letter=3
):
"""Return the position fingerprint.
"""Initialize Count instance.
Parameters
----------
word : str
The word to fingerprint
n_bits : int
Number of bits in the fingerprint returned
most_common : list
The most common tokens in the target language, ordered by frequency
bits_per_letter : int
The bits to assign for letter position
.. versionadded:: 0.4.0
"""
super(_Fingerprint, self).__init__()
self._n_bits = n_bits
self._most_common = most_common
self._bits_per_letter = bits_per_letter

def fingerprint(self, word):
"""Return the position fingerprint.
Parameters
----------
word : str
The word to fingerprint
Returns
-------
Expand All @@ -87,23 +95,24 @@ def fingerprint(
Encapsulated in class
"""
n_bits = self._n_bits
position = {}
for pos, letter in enumerate(word):
if letter not in position and letter in most_common:
position[letter] = min(pos, 2 ** bits_per_letter - 1)
if letter not in position and letter in self._most_common:
position[letter] = min(pos, 2 ** self._bits_per_letter - 1)

fingerprint = 0

for letter in most_common:
for letter in self._most_common:
if n_bits:
fingerprint <<= min(bits_per_letter, n_bits)
fingerprint <<= min(self._bits_per_letter, n_bits)
if letter in position:
fingerprint += min(position[letter], 2 ** n_bits - 1)
else:
fingerprint += min(
2 ** bits_per_letter - 1, 2 ** n_bits - 1
2 ** self._bits_per_letter - 1, 2 ** n_bits - 1
)
n_bits -= min(bits_per_letter, n_bits)
n_bits -= min(self._bits_per_letter, n_bits)
else:
break

Expand Down Expand Up @@ -159,7 +168,7 @@ def position_fingerprint(
.. versionadded:: 0.3.0
"""
return Position().fingerprint(word, n_bits, most_common, bits_per_letter)
return Position(n_bits, most_common, bits_per_letter).fingerprint(word)


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit a23b70c

Please sign in to comment.