Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fuzz branch #100

Merged
merged 19 commits into from
Sep 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 20 additions & 20 deletions abydos/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def position_fingerprint(word, n_bits=16,


_synoname_special_table = (
# Roman, string, extra, method
# Roman, match, extra, method
(False, 'NONE', '', 0),
(False, 'aine', '', 3),
(False, 'also erroneously', '', 4),
Expand Down Expand Up @@ -668,59 +668,59 @@ def roman_check(numeral, fname, lname):

# Fill fields 7 (specials) and 3 (roman numerals)
for num, special in enumerate(_synoname_special_table):
roman, string, extra, method = special
roman, match, extra, method = special
if method & method_dict['end']:
string_context = ' ' + string
loc = full_name.find(string_context)
if ((len(full_name) > len(string_context)) and
(loc == len(full_name) - len(string_context))):
match_context = ' ' + match
loc = full_name.find(match_context)
if ((len(full_name) > len(match_context)) and
(loc == len(full_name) - len(match_context))):
if roman:
if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')):
full_name = full_name[:loc]
toolcode[7] += '{:03d}'.format(num) + 'a'
if not toolcode[3]:
toolcode[3] = '{:03d}'.format(num)
if normalize == 2:
fname, lname = roman_check(string, fname, lname)
fname, lname = roman_check(match, fname, lname)
else:
full_name = full_name[:loc]
toolcode[7] += '{:03d}'.format(num) + 'a'
if method & method_dict['middle']:
string_context = ' ' + string + ' '
loc = full_name.find(string_context)
match_context = ' ' + match + ' '
loc = full_name.find(match_context)
if loc > 0:
if roman:
if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')):
full_name = (full_name[:loc] +
full_name[loc + len(string) + 1:])
full_name[loc + len(match) + 1:])
toolcode[7] += '{:03d}'.format(num) + 'b'
if not toolcode[3]:
toolcode[3] = '{:03d}'.format(num)
if normalize == 2:
fname, lname = roman_check(string, fname, lname)
fname, lname = roman_check(match, fname, lname)
else:
full_name = (full_name[:loc] +
full_name[loc + len(string) + 1:])
full_name[loc + len(match) + 1:])
toolcode[7] += '{:03d}'.format(num) + 'b'
if method & method_dict['beginning']:
string_context = string + ' '
loc = full_name.find(string_context)
match_context = match + ' '
loc = full_name.find(match_context)
if loc == 0:
full_name = full_name[len(string) + 1:]
full_name = full_name[len(match) + 1:]
toolcode[7] += '{:03d}'.format(num) + 'c'
if method & method_dict['beginning_no_space']:
loc = full_name.find(string)
loc = full_name.find(match)
if loc == 0:
toolcode[7] += '{:03d}'.format(num) + 'd'
if full_name[len(string)] not in toolcode[9]:
toolcode[9] += full_name[len(string)]
if full_name[:len(match)] not in toolcode[9]:
toolcode[9] += full_name[:len(match)]

if extra:
loc = full_name.find(extra)
if loc != -1:
toolcode[7] += '{:03d}'.format(num) + 'X'
if full_name[loc + len(extra)] not in toolcode[9]:
toolcode[9] += full_name[loc + len(string)]
if full_name[loc:loc+len(extra)] not in toolcode[9]:
toolcode[9] += full_name[loc:loc+len(match)]

return lname, fname, ''.join(toolcode)

Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ cover-erase=1
cover-html=1
cover-branches=1
cover-package=abydos
;processes=1
;processes=-1
process-timeout=60
process-restartworker=1
doctest-options=+NORMALIZE_WHITESPACE
exclude=(reg|fuzz)_test_

[flake8]
verbose=0
Expand Down
62 changes: 62 additions & 0 deletions tests/fuzz/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.


"""abydos.tests.fuzz.

This module contains fuzz tests for Abydos
"""

import random
import unicodedata
from string import printable

from six import unichr


def random_char(below=0x10ffff, must_be=None):
"""Generate a random Unicode character below U+{below}."""
while True:
char = unichr(random.randint(0, below))
try:
name = unicodedata.name(char)
if must_be is None or must_be in name:
return char
except ValueError:
pass


def fuzz(word, fuzziness=0.2):
"""Fuzz a word with noise."""
while True:
new_word = []
for ch in word:
if random.random() > fuzziness:
new_word.append(ch)
else:
if random.random() > 0.5:
new_word.append(random.choice(printable))
elif random.random() > 0.8:
new_word.append(unichr(random.randint(0, 0x10ffff)))
else:
new_word.append(unichr(random.randint(0, 0xffff)))
if random.random() > 0.5:
new_word.append(ch)
new_word = ''.join(new_word)
if new_word != word:
return new_word