Skip to content

Commit

Permalink
fix: imports
Browse files Browse the repository at this point in the history
  • Loading branch information
severinsimmler committed Apr 25, 2019
1 parent 11fdf4a commit 5c8c5a3
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 47 deletions.
46 changes: 23 additions & 23 deletions tests/test_complexity.py
@@ -1,7 +1,7 @@
import collections
import pandas as pd
import pytest
import cophi
from cophi.text import complexity


P_STAR = 1
Expand All @@ -20,92 +20,92 @@ def frequency_spectrum():
return pd.Series(freq_spectrum)

def test_ttr():
ttr = cophi.complexity.ttr(NUM_TYPES, NUM_TOKENS)
ttr = complexity.ttr(NUM_TYPES, NUM_TOKENS)
assert ttr == 0.625

def test_guiraud_r():
guiraud_r = cophi.complexity.guiraud_r(NUM_TYPES, NUM_TOKENS)
guiraud_r = complexity.guiraud_r(NUM_TYPES, NUM_TOKENS)
assert guiraud_r == 1.7677669529663687

def test_herdan_c():
herdan_c = cophi.complexity.herdan_c(NUM_TYPES, NUM_TOKENS)
herdan_c = complexity.herdan_c(NUM_TYPES, NUM_TOKENS)
assert herdan_c == 0.7739760316291208

def test_dugast_k():
dugast_k = cophi.complexity.dugast_k(NUM_TYPES, NUM_TOKENS)
dugast_k = complexity.dugast_k(NUM_TYPES, NUM_TOKENS)
assert dugast_k == 2.198387244399397

def test_maas_a2():
maas_a2 = cophi.complexity.maas_a2(NUM_TYPES, NUM_TOKENS)
maas_a2 = complexity.maas_a2(NUM_TYPES, NUM_TOKENS)
assert maas_a2 == 0.10869455276357046

def test_dugast_u():
dugast_u = cophi.complexity.dugast_u(NUM_TYPES, NUM_TOKENS)
dugast_u = complexity.dugast_u(NUM_TYPES, NUM_TOKENS)
assert dugast_u == 9.200093055032609

def test_tuldava_ln():
tuldava_ln = cophi.complexity.tuldava_ln(NUM_TYPES, NUM_TOKENS)
tuldava_ln = complexity.tuldava_ln(NUM_TYPES, NUM_TOKENS)
assert tuldava_ln == -0.4616624130844683

def test_brunet_w():
brunet_w = cophi.complexity.brunet_w(NUM_TYPES, NUM_TOKENS)
brunet_w = complexity.brunet_w(NUM_TYPES, NUM_TOKENS)
assert brunet_w == 15.527998381095463

def test_cttr():
cttr = cophi.complexity.cttr(NUM_TYPES, NUM_TOKENS)
cttr = complexity.cttr(NUM_TYPES, NUM_TOKENS)
assert cttr == 1.25

def test_summer_s():
summer_s = cophi.complexity.summer_s(NUM_TYPES, NUM_TOKENS)
summer_s = complexity.summer_s(NUM_TYPES, NUM_TOKENS)
assert summer_s == 0.650027873362293

def test_sichel_s(frequency_spectrum):
sichel_s = cophi.complexity.sichel_s(NUM_TYPES, frequency_spectrum)
sichel_s = complexity.sichel_s(NUM_TYPES, frequency_spectrum)
assert sichel_s == 0.2

def test_michea_m(frequency_spectrum):
michea_m = cophi.complexity.michea_m(NUM_TYPES, frequency_spectrum)
michea_m = complexity.michea_m(NUM_TYPES, frequency_spectrum)
assert michea_m == 5.0

def test_honore_h(frequency_spectrum):
honore_h = cophi.complexity.honore_h(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
honore_h = complexity.honore_h(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert honore_h == 519.8603854199589

def test_entropy(frequency_spectrum):
entropy = cophi.complexity.entropy(NUM_TOKENS, frequency_spectrum)
entropy = complexity.entropy(NUM_TOKENS, frequency_spectrum)
assert entropy == 1.4941751382893083

def test_yule_k(frequency_spectrum):
yule_k = cophi.complexity.yule_k(NUM_TOKENS, frequency_spectrum)
yule_k = complexity.yule_k(NUM_TOKENS, frequency_spectrum)
assert yule_k == -1250.0

def test_simpson_d(frequency_spectrum):
simpson_d = cophi.complexity.simpson_d(NUM_TOKENS, frequency_spectrum)
simpson_d = complexity.simpson_d(NUM_TOKENS, frequency_spectrum)
assert simpson_d == 0.05357142857142857

def test_herdan_vm(frequency_spectrum):
herdan_vm = cophi.complexity.herdan_vm(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
herdan_vm = complexity.herdan_vm(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert herdan_vm == 0.22360679774997894

def test_orlov_z(frequency_spectrum):
orlov_z = cophi.complexity.orlov_z(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
orlov_z = complexity.orlov_z(NUM_TYPES, NUM_TOKENS, frequency_spectrum)
assert orlov_z == 2.583892154363366

def test_get_z():
z = cophi.complexity._get_z(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
z = complexity._get_z(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
assert z == 0.33333333333333304

def test_derivative():
d = cophi.complexity._derivative(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
d = complexity._derivative(NUM_TOKENS, NUM_TYPES, P_STAR, Z)
assert d == -2.2152246080002977

def test_ci():
results = [1, 2, 3, 4, 5]
ci = cophi.complexity.ci(results)
ci = complexity.ci(results)
assert ci == 1.2396128427860047

def test_wrapper():
for measure in MEASURES:
function = cophi.complexity.wrapper(measure)
function = complexity.wrapper(measure)
assert callable(function)
assert function.__name__ == measure
28 changes: 14 additions & 14 deletions tests/test_model.py
Expand Up @@ -4,7 +4,7 @@
import lxml
import numpy as np
import pandas as pd
import cophi
from cophi.text import model


DOCUMENT = "AAABBCCCDEF"
Expand All @@ -19,25 +19,25 @@ def make_file(tmpdir, fname, content):
@pytest.fixture
def textfile_suffix(tmpdir):
p = make_file(tmpdir, "document.txt", DOCUMENT)
return cophi.model.Textfile(str(p), treat_as=None)
return model.Textfile(str(p), treat_as=None)

@pytest.fixture
def textfile_txt(tmpdir):
p = make_file(tmpdir, "document.txt", DOCUMENT)
return cophi.model.Textfile(str(p), treat_as=".txt")
return model.Textfile(str(p), treat_as=".txt")

@pytest.fixture
def textfile_xml(tmpdir):
p = make_file(tmpdir, "document.xml", "<xml>{}</xml>".format(DOCUMENT))
return cophi.model.Textfile(str(p), treat_as=".xml")
return model.Textfile(str(p), treat_as=".xml")

@pytest.fixture
def document():
return cophi.model.Document(DOCUMENT, "document", r"\w")
return model.Document(DOCUMENT, "document", r"\w")

@pytest.fixture
def corpus(document):
return cophi.model.Corpus([document])
return model.Corpus([document])


class TestTextfile:
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_stringify(self, textfile_xml):

def test_value_error(self, tmpdir):
with pytest.raises(ValueError):
cophi.model.Textfile("raises", treat_as="error")
model.Textfile("raises", treat_as="error")


class TestDocument:
Expand All @@ -95,15 +95,15 @@ def test_attributes(self, document):

def test_ngram_value_error(self):
with pytest.raises(ValueError):
cophi.model.Document(DOCUMENT, n=0)
model.Document(DOCUMENT, n=0)

def test_ngrams(self):
document = cophi.model.Document(DOCUMENT, token_pattern=r"\w", n=2)
document = model.Document(DOCUMENT, token_pattern=r"\w", n=2)
assert list(document.ngrams)[0] == "a a"
document = cophi.model.Document(DOCUMENT, token_pattern=r"\w", n=1)
document = model.Document(DOCUMENT, token_pattern=r"\w", n=1)
assert document.ngrams == LOWERCASE_TOKENS
with pytest.raises(ValueError):
document = cophi.model.Document(DOCUMENT, token_pattern=r"\w", n=None)
document = model.Document(DOCUMENT, token_pattern=r"\w", n=None)
document.ngrams == LOWERCASE_TOKENS

def test_types(self, document):
Expand Down Expand Up @@ -176,7 +176,7 @@ def test_complexity(self, document):
class TestCorpus:
def test_sparse_error(self, document):
with pytest.raises(NotImplementedError):
cophi.model.Corpus([document], sparse=True)
model.Corpus([document], sparse=True)

def test_dtm(self, corpus):
assert corpus.dtm.sum().sum() == len(TOKENS)
Expand Down Expand Up @@ -289,14 +289,14 @@ def test_orlov_z(self, corpus):

def test_svmlight(self, corpus):
output = pathlib.Path("corpus.svmlight")
cophi.model.Corpus.svmlight(corpus.dtm, output)
model.Corpus.svmlight(corpus.dtm, output)
assert output.exists()
with output.open("r", encoding="utf-8") as file:
assert file.read() == "document document a:3 b:2 c:3 d:1 e:1 f:1\n"

def test_plaintext(self, corpus):
output = pathlib.Path("corpus.txt")
cophi.model.Corpus.plaintext(corpus.dtm, output)
model.Corpus.plaintext(corpus.dtm, output)
assert output.exists()
with output.open("r", encoding="utf-8") as file:
assert file.read() == "document document a a a b b c c c d e f\n"
20 changes: 10 additions & 10 deletions tests/test_utils.py
@@ -1,37 +1,37 @@
import collections
import pytest
import cophi
from cophi.text import utils


PARAGRAPHS = [["A B C D E F", "G H I J K L"]]
DOCUMENT = PARAGRAPHS[0][0]
TOKENS = DOCUMENT.split(" ")

def test_construct_ngrams():
ngrams = cophi.utils.construct_ngrams(TOKENS)
ngrams = utils.construct_ngrams(TOKENS)
assert list(ngrams) == ["A B", "B C", "C D", "D E", "E F"]

def test_find_tokens():
tokens = cophi.utils.find_tokens(DOCUMENT, r"\w")
tokens = utils.find_tokens(DOCUMENT, r"\w")
assert list(tokens) == ["A", "B", "C", "D", "E", "F"]
# Stop tokenizing after the first token:
tokens = cophi.utils.find_tokens(DOCUMENT, r"\w", 1)
tokens = utils.find_tokens(DOCUMENT, r"\w", 1)
assert list(tokens) == ["A"]

def test_lowercase_tokens():
tokens = cophi.utils.lowercase_tokens(TOKENS)
tokens = utils.lowercase_tokens(TOKENS)
assert tokens == ["a", "b", "c", "d", "e", "f"]

def test_segment_fuzzy():
segments = cophi.utils.segment_fuzzy(PARAGRAPHS, 1)
segments = utils.segment_fuzzy(PARAGRAPHS, 1)
assert list(segments) == [[["A B C D E F"]], [["G H I J K L"]]]

def test_parameter():
parameter = cophi.utils._parameter(TOKENS, "sichel_s")
parameter = utils._parameter(TOKENS, "sichel_s")
assert len(parameter) == 2
parameter = cophi.utils._parameter(TOKENS, "honore_h")
parameter = utils._parameter(TOKENS, "honore_h")
assert len(parameter) == 3
parameter = cophi.utils._parameter(TOKENS, "entropy")
parameter = utils._parameter(TOKENS, "entropy")
assert len(parameter) == 2
parameter = cophi.utils._parameter(TOKENS, "ttr")
parameter = utils._parameter(TOKENS, "ttr")
assert len(parameter) == 2

0 comments on commit 5c8c5a3

Please sign in to comment.