/
test_corpus.py
97 lines (79 loc) · 2.56 KB
/
test_corpus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""Tests for the Corpus class."""
import pytest
from f8a_tagger.corpus import Corpus
from sys import getsizeof
import os
def test_initial_state():
"""Check the initial state of Corpus."""
c = Corpus()
assert c
assert c.get_size() == 0
assert c.get_memory_usage() == 0
def test_add_method():
"""Check the method Corpus.add()."""
c = Corpus()
assert c.get_size() == 0
c.add("file1", ["token1", "token2", "token3"])
assert c.get_size() == 1
c.add("file2", ["token1", "token2", "token3"])
assert c.get_size() == 2
# name don't have to be unique
c.add("file1", ["token1", "token2", "token3"])
assert c.get_size() == 3
def test_get_memory_usage_method():
"""Check the method Corpus.get_memory_usage()."""
c = Corpus()
c.add("file1", ["test"])
assert c.get_memory_usage() == getsizeof(["test"])
c.add("file2", ["x", "y"])
assert c.get_memory_usage() == getsizeof(["test"]) + getsizeof(["x", "y"])
def test_dump_pickle_method():
"""Check the method Corpus.dump_pickle()."""
c = Corpus()
filename = "serialized_output.dump"
c.add("file1", ["test"])
c.dump_pickle(filename)
assert os.path.isfile(filename)
def test_dump_json_method():
"""Check the method Corpus.dump_json()."""
c = Corpus()
filename = "serialized_output.json"
c.add("file1", ["test"])
c.dump_json(filename)
assert os.path.isfile(filename)
def test_load_pickle_method():
"""Check the static method Corpus.load_pickle()."""
c = Corpus()
filename = "serialized_output_2.dump"
c.add("file1", ["test"])
c.add("file2", ["foo", "bar"])
c.dump_pickle(filename)
c2 = Corpus.load_pickle(filename)
assert c2.get_size() == 2
assert "file1" in c2._names
assert "file2" in c2._names
assert ["test"] in c2._entries
assert ["foo", "bar"] in c2._entries
def test_load_json_method():
"""Check the static method Corpus.load_json()."""
c = Corpus()
filename = "serialized_output_2.json"
c.add("file1", ["test"])
c.add("file2", ["foo", "bar"])
c.dump_json(filename)
c2 = Corpus.load_json(filename)
assert c2.get_size() == 2
assert "file1" in c2._names
assert "file2" in c2._names
assert ["test"] in c2._entries
assert ["foo", "bar"] in c2._entries
with pytest.raises(TypeError):
Corpus.load_json(None)
if __name__ == '__main__':
test_initial_state()
test_add_method()
test_get_memory_usage_method()
test_dump_pickle_method()
test_dump_json_method()
test_load_pickle_method()
test_load_json_method()