Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
455 lines (423 sloc) 12.3 KB
import jtrie
import subtrie
import random
from test_subtrie import check_structure
from util import bs
dps = 2
insert_test_cases = [
(['a'], {
'a': (True, 1, {})
}),
(['abc'], {
'ab': (False, {
'c': (True, 1, {})
})
}),
(['ab','ac'], {
'a': (False, {
'b': (True, 1, {}),
'c': (True, 2, {})
})
}),
(['a'*64], {
'a'*63: (False, {
'a': (True, 1, {})
})
}),
(['a'*66], {
'a': (False, {
'a'*64: (False, {
'a': (True, 1, {})
})
})
}),
(['a'*130], {
'a': (False, {
'a'*64: (False, {
'a'*64: (False, {
'a': (True, 1, {})
})
})
})
})
]
bytestrings = {
'abc': [bs(0,0,1), bs(1,0,1), ord('a'), ord('b'), bs(0,1,0), ord('c'), 1],
'abcd': [bs(0,0,1), bs(1,0,2), ord('a'), ord('b'), ord('c'), bs(0,1,0), ord('d'), 1],
'abcdef': [bs(0,0,1), bs(1,0,4), ord('a'), ord('b'), ord('c'), ord('d'), ord('e'), bs(0,1,0), ord('f'), 1],
'ab/ac': [bs(0,0,1), bs(0,0,2), ord('a'),
bs(0,1,0), ord('b'), 0, 1,
bs(0,1,0), ord('c'), 0, 2],
}
def test_raw_inserts():
for strings, target in insert_test_cases:
yield check_raw_inserts, strings, target
def check_raw_inserts(strings, target):
jt = jtrie.JTrie(pointer_size=dps)
for s in strings:
jt.insert(s)
assert check_jtrie(jt, target)
return jt
def check_jtrie(jt, target, check_pointers=True):
return check_structure(jt.subtries[0], target, check_pointers)
def test_insert_order():
target = {
'a': (False, {
'l': (True, 1, {
'fre': (False, {
'd': (True, 2, {})
}),
'le': (False, {
'y': (True, 3, {})
})
})
})
}
jt = jtrie.JTrie(pointer_size=dps)
map(jt.insert, ['al', 'alley', 'alfred'])
assert check_jtrie(jt, target, check_pointers=False)
jt = jtrie.JTrie(pointer_size=dps)
map(jt.insert, ['alley', 'alfred', 'al'])
assert check_jtrie(jt, target, check_pointers=False)
jt = jtrie.JTrie(pointer_size=dps)
map(jt.insert, ['alley', 'al', 'alfred'])
assert check_jtrie(jt, target, check_pointers=False)
return jt
def test_insert_1a():
# Case 1a
# -a-*(^bc)-d-*
target = {
'a': (False, {
'bc': (False, {
'd': (True, 1, {})
}),
'x': (True, 2, {})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('ax')
assert check_jtrie(jt, target)
return jt
def test_insert_1b():
# Case 1b
# -a-*(^b)-d-*
target = {
'a': (False, {
'b': (False, {
'c': (True, 1, {})
}),
'x': (True, 2, {})
})
}
st = subtrie.SubTrie(dps, bytestrings['abc'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('ax')
assert check_jtrie(jt, target)
return jt
def test_insert_2ac():
# Case 2ac
# -a-*(bc^de)-f-*
target = {
'ab': (False, {
'c': (False, {
'de': (False, {
'f': (True, 1, {})
}),
'x': (True, 2, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcdef'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abcx')
assert check_jtrie(jt, target)
return jt
def test_insert_2bd():
# Case 2ac
# -a-*(b^c)-d-*
target = {
'a': (False, {
'b': (False, {
'c': (False, {
'd': (True, 1, {}),
}),
'x': (True, 2, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abx')
assert check_jtrie(jt, target)
return jt
def test_insert_3a():
# Case 2ac
# -a-*(^bc)-f-*
target = {
'ab': (False, {
'c': (False, {
'd': (True, 1, {}),
'x': (True, 2, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abcx')
assert check_jtrie(jt, target)
return jt
def test_insert_3b():
# Case 3b
# -a-*(^b)-d-*
target = {
'a': (False, {
'b': (False, {
'c': (True, 1, {}),
'x': (True, 2, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abc'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abx')
assert check_jtrie(jt, target)
return jt
def test_insert_exists_normal():
# Already exists and marked
# -a-*(bc)-d-*
target = {
'abc': (False, {
'd': (True, 1, {})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abcd')
assert check_jtrie(jt, target)
return jt
def test_insert_exists_notmarked():
target = {
'a': (True, 3, {
'b': (True, 1, {}),
'c': (True, 2, {})
})
}
st = subtrie.SubTrie(dps, bytestrings['ab/ac'])
jt = jtrie.JTrie(root=st, next_string_id=3, pointer_size=dps)
jt.insert('a')
assert check_jtrie(jt, target)
return jt
def test_insert_exists_case1():
# Case 4-1
# -[a]-*(bc)-d-*
target = {
'a': (True, 2, {
'bc': (False, {
'd': (True, 1, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('a')
assert check_jtrie(jt, target)
return jt
def test_insert_exists_case2():
# Case 4-2
# -a-*([b]c)-d-*
target = {
'a': (False, {
'b': (True, 2, {
'c': (False, {
'd': (True, 1, {})
})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('ab')
assert check_jtrie(jt, target)
return jt
def test_insert_exists_case3():
# Case 4-3
# -a-*(b[c])-d-*
target = {
'ab': (False, {
'c': (True, 2, {
'd': (True, 1, {})
})
})
}
st = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st, next_string_id=2, pointer_size=dps)
jt.insert('abc')
assert check_jtrie(jt, target)
return jt
def test_find():
jt = jtrie.JTrie(pointer_size=dps)
test_strings = ['car', 'carpet', 'truck', 'trucker', 'track']
map(jt.insert, test_strings)
for n, s in zip(xrange(1, len(test_strings)+1), test_strings):
assert jt.find(s) == n
return jt
def test_large_fanout():
strings = [chr(x) for x in xrange(33, 33+94)]
first_batch, second_batch = strings[:62], strings[62:]
fanout_1 = {}
next_string_id = 1
for s in first_batch:
fanout_1[s] = (True, next_string_id, {})
next_string_id += 1
fanout_2 = {chr(0): {}}
for s in second_batch:
fanout_2[chr(0)][s] = (True, next_string_id, {})
next_string_id += 1
fanout_1[chr(0)] = fanout_2
jt = jtrie.JTrie(pointer_size=dps)
for s in strings:
jt.insert(s)
assert check_jtrie(jt, fanout_1)
for s in strings:
assert jt.find(s)
return jt
def test_random():
num_letters = 5
num_inserts = 50
length_range = (1, 30)
possible_letters = [chr(97+i) for i in xrange(num_letters)]
strings = [
''.join([
random.choice(possible_letters) for j in xrange(random.randint(*length_range))
])
for i in xrange(num_inserts)
]
# without join
with open('strings.txt', 'w') as f:
# with join
jt2 = jtrie.JTrie(pointer_size=4)
for s in strings:
f.write(s)
f.write('\n')
jt2.insert(s, join=False)
# jt2.write_graphviz('jtrie_nojoin.dot')
with open('strings.txt', 'w') as f:
# with join
jt = jtrie.JTrie(pointer_size=4)
for s in strings:
f.write(s)
f.write('\n')
jt.insert(s)
for s in strings:
assert jt.find(s)
print 'joined jtrie is', float(len(jt.subtries.keys()))/len(jt2.subtries.keys()), 'times as large as unjoined'
return jt
def test_long_strings():
jt = jtrie.JTrie(pointer_size=dps)
# strings = ['a' + 'b'*49,
# 'aa' + 'c'*48,
# 'aaa' + 'd'*47,
# 'aaaa' + 'e'*46,
# 'aaaaa' + 'f'*47,
# 'aaa' + 'd'*46,
# 'aaa' + 'd'*45,
# 'aaa' + 'd'*44,]
# strings = ['a'*126,
# 'xc'+'b'*119,
# 'xb'+'c'*119]
# strings = [
# ('a' * 63 + 'b' * 63 + 'c' * 63 + 'd' * 63 + 'e' * 63 + 'f' * 63 + 'g' * 63 + 'h' * 63 + 'i' * 63 + 'j' * 63 + 'k' * 63 + 'l' * 63) * 4
# ]
# print len(strings[0])
with open('strings.txt', 'r') as f:
strings = [s[:-1] for s in f.readlines()]
for s in strings:
jt.insert(s, join=True)
jt.write_graphviz()
for s in strings:
assert jt.find(s)
return jt
def test_multi_1():
# Let's try this:
#
# a -> b -> abc
# \- c -> abcd
#
# Which is to say:
# ababc
# acabcd
bytestrings = {
'abc': [bs(0,0,1), bs(1,0,1), ord('a'), ord('b'), bs(0,1,0), ord('c'), 0, 1],
'abcd': [bs(0,0,1), bs(1,0,2), ord('a'), ord('b'), ord('c'), bs(0,1,0), ord('d'), 0, 2],
'ab/ac': [bs(0,0,1), bs(0,0,2), ord('a'),
bs(0,1,0), ord('b'), 128, 1,
bs(0,1,0), ord('c'), 128, 2],
}
st0 = subtrie.SubTrie(dps, bytestrings['ab/ac'])
st1 = subtrie.SubTrie(dps, bytestrings['abc'])
st2 = subtrie.SubTrie(dps, bytestrings['abcd'])
jt = jtrie.JTrie(root=st0, pointer_size=dps, next_string_id=3)
jt.subtries[1] = st1
jt.subtries[2] = st2
assert jt.find('ababc') == 1
assert jt.find('acabcd') == 2
jt.insert('ad')
jt.insert('ababe')
jt.insert('acabf')
return jt
def test_follow_subtrie_roots():
# ab
# abc
# abd
bytestrings = {
'ab1': [bs(0,0,1), bs(0,0,1), ord('a'), bs(0,1,0), ord('b'), 128, 1],
'b2c': [bs(0,1,1), 128, 2, bs(0,1,0), ord('c'), 0, 1],
'b3d': [bs(0,1,1), 0, 2, bs(0,1,0), ord('d'), 0, 3],
}
st0 = subtrie.SubTrie(dps, bytestrings['ab1'])
st1 = subtrie.SubTrie(dps, bytestrings['b2c'])
st2 = subtrie.SubTrie(dps, bytestrings['b3d'])
jt = jtrie.JTrie(root=st0, pointer_size=dps)
jt.subtries[1] = st1
jt.subtries[2] = st2
assert jt.find('ab') == 2
assert jt.find('abc') == 1
assert jt.find('abd') == 3
def test_long_inserts():
jt = jtrie.JTrie(pointer_size=dps, block_size=10, _enforce_min_block_size=False)
jt.insert("aaa")
jt.insert("aaaa")
return jt
def generate_perms(alphabet, max_length):
perms = [c for c in alphabet]
for i in xrange(max_length-1):
next_perms = []
for c in alphabet:
next_perms.extend([c + p for p in perms])
perms.extend(next_perms)
return perms, next_perms
def test_permutations_leq():
alphabet = ['a', 'b']
max_length = 4
x = len(alphabet)
#first level is root, second level is branch heads, third level is pointers
block_size = 1 + x*(2+dps) + x*x*(2+dps)
jt = jtrie.JTrie(pointer_size=dps, block_size=block_size, _enforce_min_block_size=False)
for p in generate_perms(alphabet, max_length)[0]:
jt.insert(p)
return jt
def test_permutations_eq():
alphabet = ['a', 'b']
max_length = 4
x = len(alphabet)
#first level is root, second level is branch heads, third level is pointers
block_size = 1 + x*(2+dps) + x*x*(2+dps)
jt = jtrie.JTrie(pointer_size=dps, block_size=block_size, _enforce_min_block_size=False)
for p in generate_perms(alphabet, max_length)[1]:
jt.insert(p)
return jt
if __name__ == '__main__':
jt = test_random()
# jt = test_long_strings()
jt.write_graphviz()
Jump to Line
Something went wrong with that request. Please try again.