Skip to content

Commit

Permalink
Merge pull request #100 from chrislit/fuzz
Browse files Browse the repository at this point in the history
fuzz branch
  • Loading branch information
chrislit committed Sep 30, 2018
2 parents 6d3f3a6 + 44aea0b commit 9719008
Show file tree
Hide file tree
Showing 9 changed files with 2,816 additions and 22 deletions.
40 changes: 20 additions & 20 deletions abydos/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def position_fingerprint(word, n_bits=16,


_synoname_special_table = (
# Roman, string, extra, method
# Roman, match, extra, method
(False, 'NONE', '', 0),
(False, 'aine', '', 3),
(False, 'also erroneously', '', 4),
Expand Down Expand Up @@ -668,59 +668,59 @@ def roman_check(numeral, fname, lname):

# Fill fields 7 (specials) and 3 (roman numerals)
for num, special in enumerate(_synoname_special_table):
roman, string, extra, method = special
roman, match, extra, method = special
if method & method_dict['end']:
string_context = ' ' + string
loc = full_name.find(string_context)
if ((len(full_name) > len(string_context)) and
(loc == len(full_name) - len(string_context))):
match_context = ' ' + match
loc = full_name.find(match_context)
if ((len(full_name) > len(match_context)) and
(loc == len(full_name) - len(match_context))):
if roman:
if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')):
full_name = full_name[:loc]
toolcode[7] += '{:03d}'.format(num) + 'a'
if not toolcode[3]:
toolcode[3] = '{:03d}'.format(num)
if normalize == 2:
fname, lname = roman_check(string, fname, lname)
fname, lname = roman_check(match, fname, lname)
else:
full_name = full_name[:loc]
toolcode[7] += '{:03d}'.format(num) + 'a'
if method & method_dict['middle']:
string_context = ' ' + string + ' '
loc = full_name.find(string_context)
match_context = ' ' + match + ' '
loc = full_name.find(match_context)
if loc > 0:
if roman:
if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')):
full_name = (full_name[:loc] +
full_name[loc + len(string) + 1:])
full_name[loc + len(match) + 1:])
toolcode[7] += '{:03d}'.format(num) + 'b'
if not toolcode[3]:
toolcode[3] = '{:03d}'.format(num)
if normalize == 2:
fname, lname = roman_check(string, fname, lname)
fname, lname = roman_check(match, fname, lname)
else:
full_name = (full_name[:loc] +
full_name[loc + len(string) + 1:])
full_name[loc + len(match) + 1:])
toolcode[7] += '{:03d}'.format(num) + 'b'
if method & method_dict['beginning']:
string_context = string + ' '
loc = full_name.find(string_context)
match_context = match + ' '
loc = full_name.find(match_context)
if loc == 0:
full_name = full_name[len(string) + 1:]
full_name = full_name[len(match) + 1:]
toolcode[7] += '{:03d}'.format(num) + 'c'
if method & method_dict['beginning_no_space']:
loc = full_name.find(string)
loc = full_name.find(match)
if loc == 0:
toolcode[7] += '{:03d}'.format(num) + 'd'
if full_name[len(string)] not in toolcode[9]:
toolcode[9] += full_name[len(string)]
if full_name[:len(match)] not in toolcode[9]:
toolcode[9] += full_name[:len(match)]

if extra:
loc = full_name.find(extra)
if loc != -1:
toolcode[7] += '{:03d}'.format(num) + 'X'
if full_name[loc + len(extra)] not in toolcode[9]:
toolcode[9] += full_name[loc + len(string)]
if full_name[loc:loc+len(extra)] not in toolcode[9]:
toolcode[9] += full_name[loc:loc+len(match)]

return lname, fname, ''.join(toolcode)

Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ cover-erase=1
cover-html=1
cover-branches=1
cover-package=abydos
;processes=1
;processes=-1
process-timeout=60
process-restartworker=1
doctest-options=+NORMALIZE_WHITESPACE
exclude=(reg|fuzz)_test_

[flake8]
verbose=0
Expand Down
62 changes: 62 additions & 0 deletions tests/fuzz/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.


"""abydos.tests.fuzz.
This module contains fuzz tests for Abydos
"""

import random
import unicodedata
from string import printable

from six import unichr


def random_char(below=0x10ffff, must_be=None):
"""Generate a random Unicode character below U+{below}."""
while True:
char = unichr(random.randint(0, below))
try:
name = unicodedata.name(char)
if must_be is None or must_be in name:
return char
except ValueError:
pass


def fuzz(word, fuzziness=0.2):
"""Fuzz a word with noise."""
while True:
new_word = []
for ch in word:
if random.random() > fuzziness:
new_word.append(ch)
else:
if random.random() > 0.5:
new_word.append(random.choice(printable))
elif random.random() > 0.8:
new_word.append(unichr(random.randint(0, 0x10ffff)))
else:
new_word.append(unichr(random.randint(0, 0xffff)))
if random.random() > 0.5:
new_word.append(ch)
new_word = ''.join(new_word)
if new_word != word:
return new_word

0 comments on commit 9719008

Please sign in to comment.