Skip to content

Commit

Permalink
release 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
amenezes committed Nov 6, 2021
1 parent 0c94979 commit 8d1e107
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 25 deletions.
14 changes: 2 additions & 12 deletions benchmark/test_pybmoore.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
"tests/data/br_constitution.txt",
["Supremo Tribunal Federal", "Emenda Constitucional"],
),
(
"tests/data/us_constitution.txt",
["freedom", "Congress"],
),
],
)
def test_search_multiple_terms(filename, terms, benchmark):
Expand All @@ -29,12 +25,10 @@ def test_search_multiple_terms(filename, terms, benchmark):
@pytest.mark.parametrize(
"filename,term",
[
("tests/data/br_constitution.txt", "Deus"),
("tests/data/br_constitution.txt", "Lei nº"),
("tests/data/br_constitution.txt", "Brasil"),
("tests/data/us_constitution.txt", "Section"),
("tests/data/us_constitution.txt", "freedom"),
("tests/data/br_constitution.txt", "Supremo Tribunal Federal"),
("tests/data/us_constitution.txt", "Congress"),
("tests/data/us_constitution.txt", "Congress of the United States"),
],
)
def test_search_single_term(filename, term, benchmark):
Expand All @@ -48,12 +42,8 @@ def test_search_single_term(filename, term, benchmark):
("algorithm"),
("string-searching"),
("19"),
("constant factor"),
("The Boyer–Moore"),
("string-search"),
("computer science,"),
("algorithm preprocess"),
("Wojciech Rytter"),
],
)
def test_search(pattern, benchmark):
Expand Down
2 changes: 1 addition & 1 deletion pybmoore/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.0"
__version__ = "1.1.0"
19 changes: 8 additions & 11 deletions pybmoore/_bm.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from collections import deque
from typing import Dict, List, Tuple

import cython

from ._bm cimport calc_offset, term_index
Expand All @@ -22,7 +19,7 @@ cdef bint flag(int term_index, str suffix_char, str pattern_char):
return 1


def search(pattern: str, source: str) -> List[Tuple[int, int]]:
cpdef search(pattern: str, source: str):
pattern_len: cython.int = len(pattern)
source_len: cython.int = len(source)
good_suffix = suffix_shift(pattern)
Expand All @@ -49,27 +46,27 @@ def search(pattern: str, source: str) -> List[Tuple[int, int]]:
return r


def bad_char_shift(pattern: str) -> Dict[str, int]:
cdef bad_char_shift(str pattern):
pattern_len: cython.int = len(pattern) - 1
return {pattern[i]: (pattern_len - i) for i in range(pattern_len)}


def suffix_shift(pattern: str) -> Dict:
cdef suffix_shift(str pattern):
pattern_len: cython.int = len(pattern)
skip_list = {}
_buffer: deque = deque()
for badchar in pattern[::-1]:
_buffer = ""
for badchar in reversed(pattern):
skip_list[len(_buffer)] = suffix_position(
badchar, _buffer, pattern, pattern_len
)

_buffer.appendleft(badchar)
_buffer = f"{_buffer}{badchar}"
return skip_list


def suffix_position(badchar: str, suffix: deque, pattern: str, pattern_len: int) -> int:
cdef int suffix_position(str badchar, str suffix, str pattern, int pattern_len):
suffix_len: cython.int = len(suffix)
for offset in range(1, pattern_len + 1)[::-1]:
for offset in reversed(range(1, pattern_len + 1)):
flag_active: cython.bint = 1
tindex = term_index(offset, suffix_len)
for suffix_index in range(suffix_len):
Expand Down
16 changes: 15 additions & 1 deletion pybmoore/_boyer_moore.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import singledispatch
from typing import Dict, List, Tuple

Expand All @@ -6,7 +7,20 @@

@singledispatch
def search(pattern: List[str], source: str) -> Dict:
return {criteria: _bm.search(criteria, source) for criteria in pattern}
resp = {}
pattern_len = len(pattern)
with ProcessPoolExecutor(max_workers=pattern_len) as executor:
futures = {
executor.submit(_search, pattern[i], source) for i in range(pattern_len)
}
for future in as_completed(futures):
term, result = future.result()
resp[term] = result
return resp


def _search(pattern: str, source: str):
return pattern, search(pattern, source)


@search.register(str) # type: ignore
Expand Down
5 changes: 5 additions & 0 deletions tests/test_pybmoore.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pybmoore


@pytest.mark.skip
@pytest.mark.parametrize(
"pattern, expected",
[
Expand All @@ -20,6 +21,7 @@ def test_bad_char_shift(pattern, expected):
assert pybmoore._bm.bad_char_shift(pattern) == expected


@pytest.mark.skip
@pytest.mark.parametrize(
"pattern, expected",
[
Expand Down Expand Up @@ -47,6 +49,7 @@ def test_suffix_shift(pattern, expected):
assert pybmoore._bm.suffix_shift(pattern) == expected


@pytest.mark.skip
@pytest.mark.parametrize(
"badchar, suffix, pattern, expected",
[
Expand Down Expand Up @@ -100,9 +103,11 @@ def test_search(pattern, expected):
("tests/data/br_constitution.txt", "Lei nº", 49),
("tests/data/br_constitution.txt", "Brasil", 41),
("tests/data/br_constitution.txt", "§ 1º", 293),
("tests/data/br_constitution.txt", "Supremo Tribunal Federal", 62),
("tests/data/us_constitution.txt", "Section", 56),
("tests/data/us_constitution.txt", "freedom", 1),
("tests/data/us_constitution.txt", "Congress", 60),
("tests/data/us_constitution.txt", "Congress of the United States", 1),
],
)
def test_search_with_large_text(filename, term, expected):
Expand Down

0 comments on commit 8d1e107

Please sign in to comment.