/
test_complexity.py
111 lines (86 loc) · 3.39 KB
/
test_complexity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import collections
import pandas as pd
import pytest
from cophi.text import complexity
P_STAR = 1
Z = 2
NUM_TYPES = 5
NUM_TOKENS = 8
MEASURES = {"ttr", "guiraud_r", "herdan_c", "dugast_k", "maas_a2", "dugast_u",
"tuldava_ln", "brunet_w", "cttr", "summer_s", "sichel_s", "michea_m",
"honore_h", "entropy", "yule_k", "simpson_d", "herdan_vm", "orlov_z"}
@pytest.fixture
def frequency_spectrum():
tokens = ["A", "A", "A", "B", "B", "C", "D", "E"]
freqs = collections.Counter(tokens)
freq_spectrum = collections.Counter(freqs.values())
return pd.Series(freq_spectrum)
def test_ttr():
ttr = complexity.ttr(NUM_TYPES, NUM_TOKENS)
assert ttr == 0.625
def test_guiraud_r():
guiraud_r = complexity.guiraud_r(NUM_TYPES, NUM_TOKENS)
assert guiraud_r == 1.7677669529663687
def test_herdan_c():
herdan_c = complexity.herdan_c(NUM_TYPES, NUM_TOKENS)
assert herdan_c == 0.7739760316291208
def test_dugast_k():
dugast_k = complexity.dugast_k(NUM_TYPES, NUM_TOKENS)
assert dugast_k == 2.198387244399397
def test_maas_a2():
maas_a2 = complexity.maas_a2(NUM_TYPES, NUM_TOKENS)
assert maas_a2 == 0.10869455276357046
def test_dugast_u():
dugast_u = complexity.dugast_u(NUM_TYPES, NUM_TOKENS)
assert dugast_u == 9.200093055032609
def test_tuldava_ln():
tuldava_ln = complexity.tuldava_ln(NUM_TYPES, NUM_TOKENS)
assert tuldava_ln == -0.4616624130844683
def test_brunet_w():
brunet_w = complexity.brunet_w(NUM_TYPES, NUM_TOKENS)
assert brunet_w == 15.527998381095463
def test_cttr():
cttr = complexity.cttr(NUM_TYPES, NUM_TOKENS)
assert cttr == 1.25
def test_summer_s():
summer_s = complexity.summer_s(NUM_TYPES, NUM_TOKENS)
assert summer_s == 0.650027873362293
def test_sichel_s(frequency_spectrum):
sichel_s = complexity.sichel_s(NUM_TYPES, frequency_spectrum)
assert sichel_s == 0.2
def test_michea_m(frequency_spectrum):
michea_m = complexity.michea_m(NUM_TYPES, frequency_spectrum)
assert michea_m == 5.0
def test_honore_h(frequency_spectrum):
honore_h = complexity.honore_h(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert honore_h == 519.8603854199589
def test_entropy(frequency_spectrum):
entropy = complexity.entropy(NUM_TOKENS, frequency_spectrum)
assert entropy == 1.4941751382893083
def test_yule_k(frequency_spectrum):
yule_k = complexity.yule_k(NUM_TOKENS, frequency_spectrum)
assert yule_k == -1250.0
def test_simpson_d(frequency_spectrum):
simpson_d = complexity.simpson_d(NUM_TOKENS, frequency_spectrum)
assert simpson_d == 0.05357142857142857
def test_herdan_vm(frequency_spectrum):
herdan_vm = complexity.herdan_vm(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert herdan_vm == 0.22360679774997894
def test_orlov_z(frequency_spectrum):
orlov_z = complexity.orlov_z(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert orlov_z == 2.583892154363366
def test_get_z():
z = complexity._get_z(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
assert z == 0.33333333333333304
def test_derivative():
d = complexity._derivative(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
assert d == -2.2152246080002977
def test_ci():
results = [1, 2, 3, 4, 5]
ci = complexity.ci(results)
assert ci == 1.2396128427860047
def test_wrapper():
for measure in MEASURES:
function = complexity.wrapper(measure)
assert callable(function)
assert function.__name__ == measure