-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
100 lines (75 loc) · 3.47 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from bitarray import bitarray
import mezip
import unittest
#---------------------------------------------------------------------------
#
# compression tests
#
#---------------------------------------------------------------------------
abba = 'AABABBBABAABABBBABBABB'
abba_newline = 'AABABBBABAABABBBABBABB\n'
abba_compressed = '01110100101001011100101100111'
abba_newline_compressed = '001011001000101000101011000101100011110'
abba_full_payload = '0000001001000001010000100000001101110100101001011100101100111000'
abba_newline_full_payload = '00000011010000010100001000001010000000010010110010001010001010110001011000111100'
class TestCompression(unittest.TestCase):
def test_tokenize(self):
ngrams, symbol_dict = mezip.tokenize(abba)
self.assertEqual(ngrams, ['A', 'AB', 'ABB', 'B', 'ABA', 'ABAB', 'BB',
'ABBA', 'BB'])
self.assertEqual(symbol_dict, ['A', 'B'])
ngrams, symbol_dict = mezip.tokenize(abba_newline)
self.assertEqual(ngrams, ['A', 'AB', 'ABB', 'B', 'ABA', 'ABAB', 'BB',
'ABBA', 'BB\n'])
self.assertEqual(symbol_dict, ['A', 'B', '\n'])
def test_encode(self):
ngrams, symbol_dict = mezip.tokenize(abba)
bit_string = mezip.encode(ngrams, symbol_dict)
self.assertEqual(bit_string, bitarray(abba_compressed))
bit_string = mezip.encode(['A', 'AB', 'ABB', 'B', 'ABA', 'ABAB', 'BB',
'ABBA', 'BB'], ['A', 'B'])
self.assertEqual(bit_string, bitarray(abba_compressed))
ngrams, symbol_dict = mezip.tokenize(abba_newline)
bit_string = mezip.encode(ngrams, symbol_dict)
self.assertEqual(bit_string,
bitarray(abba_newline_compressed))
bit_string = mezip.encode(['A', 'AB', 'ABB', 'B', 'ABA', 'ABAB', 'BB',
'ABBA', 'BB\n'], ['A', 'B', '\n'])
self.assertEqual(bit_string,
bitarray(abba_newline_compressed))
def test_byte_align(self):
bit_string, num_zeros = mezip.byte_align(bitarray('01110100101001011100101100111'))
self.assertEqual(bit_string, bitarray('01110100101001011100101100111000'))
self.assertEqual(num_zeros, 3)
bit_string, num_zeros = mezip.byte_align(bitarray('001011001000101000101011000101100011110'))
self.assertEqual(bit_string,
bitarray('0010110010001010001010110001011000111100'))
self.assertEqual(num_zeros, 1)
def test_makeHeader(self):
ngrams, symbol_dict = mezip.tokenize(abba)
header = mezip.makeHeader(symbol_dict, 3)
self.assertEqual(header, bitarray('00000010010000010100001000000011'))
ngrams, symbol_dict = mezip.tokenize(abba_newline)
header = mezip.makeHeader(symbol_dict, 1)
self.assertEqual(header, bitarray('0000001101000001010000100000101000000001'))
def test_compress(self):
payload = mezip.compress(abba)
self.assertEqual(payload, bitarray(abba_full_payload))
payload = mezip.compress(abba_newline)
self.assertEqual(payload, bitarray(abba_newline_full_payload))
#---------------------------------------------------------------------------
#
# decompression tests
#
#---------------------------------------------------------------------------
'''
class TestDecompression(unittest.TestCase):
def test_ConsumeHeader(self):
pass
def test_getByteString(self):
pass
def test_tokenizeCompressed(self):
pass
'''
if __name__ == '__main__':
unittest.main()