In [1]:
# BF算法：Brute Force
def bf_search(main_str, pattern_str):
    n, m = len(main_str), len(pattern_str)
    if n < m:
        return -1
    
    for i in range(n - m + 1):
        j = 0
        while j < m:
            if main_str[i + j] == pattern_str[j]:
                j = j + 1
            else:
                break
        if j == m:
            return i
    return -1

In [13]:
def simple_hash(s, start, end, hash_table):
    res = 0
    length = end - start
    for i, c in enumerate(s[start: end + 1]):
        res += (ord(c) - ord('a')) * hash_table[length - i]
    return res

def rk_search(main_str, pattern_str): # 没有对index进行edge case处理
    n, m = len(main_str), len(pattern_str)
    look_up = [None] * (n-m+1)
    hash_table = [26 ** i for i in range(m)]
    look_up[0] = simple_hash(main_str, 0, m-1, hash_table)
    for i in range(1, n-m+1):
        look_up[i] = \
            (look_up[i-1] - hash_table[m - 1] * simple_hash(main_str, i-1, i-1, hash_table)) * 26 + \
            simple_hash(main_str, i+m-1, i+m-1, hash_table)
    
    hash_target = simple_hash(pattern_str, 0, m-1, hash_table)
    for i, h in enumerate(look_up):
        if hash_target == h:
            return i
    return -1

In [3]:
def simple_hash2(s, start, end):
    res = 0
    length = end - start
    for c in s[start: end + 1]:
        res += ord(c) # 处理哈希值过大
    return res

def rk_search2(main_str, pattern_str):
    n, m = len(main_str), len(pattern_str)
    look_up = [None] * (n-m+1)
    look_up[0] = simple_hash2(main_str, 0, m-1)
    for i in range(1, n-m+1):
        look_up[i] = \
            look_up[i-1] - simple_hash2(main_str, i-1, i-1) + simple_hash2(main_str, i+m-1, i+m-1)
    
    hash_target = simple_hash2(pattern_str, 0, m-1)
    for i, h in enumerate(look_up):
        if hash_target == h:
            # 处理哈希冲突
            if pattern_str == main_str[i : i+m]:
                return i
    return -1

In [5]:
from time import time
m_str = 'a'*10000
p_str = 'a'*200+'b'

print('--- time consume ---')
t = time()
print('[bf] result:', bf_search(m_str, p_str))
print('[bf] time cost: {0:.5}s'.format(time()-t))

t = time()
print('[rk] result:', rk_search(m_str, p_str))
print('[rk] time cost: {0:.5}s'.format(time()-t))

t = time()
print('[rk2] result:', rk_search2(m_str, p_str))
print('[rk2] time cost: {0:.5}s'.format(time()-t))

print('')
print('--- search ---')
m_str = 'thequickbrownfoxjump1soverthelazydog'
p_str = 'jump1'
print('[bf] result:', bf_search(m_str, p_str))
print('[rk] result:', rk_search(m_str, p_str))
print('[rk2] result:', rk_search2(m_str, p_str))

--- time consume ---
[bf] result: -1
[bf] time cost: 0.5104s
[rk] result: -1
[rk] time cost: 0.038112s
[rk2] result: -1
[rk2] time cost: 0.024065s

--- search ---
[bf] result: 16
[rk] result: 16
[rk2] result: 16


In [2]:
def simple_hash(s, st, ed, hash_table):
    res = 0
    length = ed - st
    for i, c in enumerate(s[st: ed + 1]):
        res += (ord(c) - ord('a')) * hash_table[length - i]
    return res

def rk_search(main_str, pattern_str):
    n, m = len(main_str), len(pattern_str)
    hash_table = [26 ** i for i in range(m)]
    look_up = [None] * (n - m + 1)
    look_up[0] = simple_hash(main_str, 0, m-1, hash_table)
    for i in range(1, n - m + 1):
        look_up[i] = (look_up[i-1] - hash_table[m-1] * simple_hash(main_str, i-1, i-1, hash_table))*26 \
            + simple_hash(main_str, i+m-1, i+m-1, hash_table)
    
    hash_target = simple_hash(pattern_str, 0, m-1, hash_table)
    for i, h in enumerate(look_up):
        if h == hash_target:
            return i
    return -1

In [3]:
def simple_hash2(string, s, e):
    res = 0
    length = e - s
    for c in string[s:e+1]:
        res += ord(c)
    return res

def rk_search2(main_str, pattern_str):
    n, m = len(main_str), len(pattern_str)
    look_up = [None] * (n - m + 1)
    look_up[0] = simple_hash2(main_str, 0, m - 1)
    for i in range(1, n - m + 1):
        look_up[i] = look_up[i - 1] - simple_hash2(main_str, i - 1, i - 1) + \
            simple_hash2(main_str, i + m - 1, i + m - 1)
    
    target_h = simple_hash2(pattern_str, 0, m - 1)
    for i, h in enumerate(look_up):
        if h == target_h:
            if main_str[i: i + m] == pattern_str:
                return i
    return -1