Skip to content

Commit

Permalink
Added z-function implementation in strings/algorithms.py (#523)
Browse files Browse the repository at this point in the history
  • Loading branch information
CarolLuca committed Apr 3, 2023
1 parent 1aa53bf commit 2482adb
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
49 changes: 49 additions & 0 deletions pydatastructs/strings/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def find(text, query, algorithm, **kwargs):
'rabin_karp' -> Rabin–Karp algorithm as given in [2].
'boyer_moore' -> Boyer-Moore algorithm as given in [3].
'z_function' -> Z-function algorithm as given in [4].
backend: pydatastructs.Backend
The backend to be used.
Optional, by default, the best available
Expand Down Expand Up @@ -67,6 +70,7 @@ def find(text, query, algorithm, **kwargs):
.. [1] https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
.. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
.. [3] https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm
.. [4] https://usaco.guide/CPH.pdf#page=257
"""
raise_if_backend_is_not_python(
find, kwargs.get('backend', Backend.PYTHON))
Expand Down Expand Up @@ -196,3 +200,48 @@ def _boyer_moore(text, query):
else:
shift += max(1, j + 1)
return positions

def _z_vector(text, query):
string = text
if query != "":
string = query + str("$") + text

z_fct = OneDimensionalArray(int, len(string))
z_fct.fill(0)

curr_pos = 1
seg_left = 0
seg_right = 0

for curr_pos in range(1,len(string)):
if curr_pos <= seg_right:
z_fct[curr_pos] = min(seg_right - curr_pos + 1, z_fct[curr_pos - seg_left])

while curr_pos + z_fct[curr_pos] < len(string) and \
string[z_fct[curr_pos]] == string[curr_pos + z_fct[curr_pos]]:
z_fct[curr_pos] += 1

if curr_pos + z_fct[curr_pos] - 1 > seg_right:
seg_left = curr_pos
seg_right = curr_pos + z_fct[curr_pos] - 1

final_z_fct = DynamicOneDimensionalArray(int, 0)
start_index = 0
if query != "":
start_index = len(query) + 1
for pos in range(start_index, len(string)):
final_z_fct.append(z_fct[pos])

return final_z_fct

def _z_function(text, query):
positions = DynamicOneDimensionalArray(int, 0)
if len(text) == 0 or len(query) == 0:
return positions

fct = _z_vector(text, query)
for pos in range(len(fct)):
if fct[pos] == len(query):
positions.append(pos)

return positions
7 changes: 6 additions & 1 deletion pydatastructs/strings/tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ def test_rka():
def test_bm():
_test_common_string_matching('boyer_moore')

def test_zf():
_test_common_string_matching('z_function')

def _test_common_string_matching(algorithm):
true_text_pattern_dictionary = {
"Knuth-Morris-Pratt": "-Morris-",
"abcabcabcabdabcabdabcabca": "abcabdabcabca",
"aefcdfaecdaefaefcdaefeaefcdcdeae": "aefcdaefeaefcd",
"aaaaaaaa": "aaa",
"fullstringmatch": "fullstringmatch"
"fullstringmatch": "fullstringmatch",
"z-function": "z-fun"
}
for test_case_key in true_text_pattern_dictionary:
text = test_case_key
Expand All @@ -32,6 +36,7 @@ def _test_common_string_matching(algorithm):
"abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
"aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
"fullstringmatch": "fullstrinmatch",
"z-function": "function-",
"abc": "",
"": "abc"
}
Expand Down

0 comments on commit 2482adb

Please sign in to comment.