Skip to content

Commit

Permalink
release: v1.1.0
Browse files Browse the repository at this point in the history
Testing
  • Loading branch information
severinsimmler committed Dec 23, 2018
2 parents aa1c62e + 788cad5 commit dfa82be
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/cophi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@
:module:`complexity` module.
"""

from cophi.api import document, corpus
from cophi.api import document, corpus, export
2 changes: 1 addition & 1 deletion src/cophi/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION = (1, 0, 10)
VERSION = (1, 1, 0)

__version__ = ".".join(map(str, VERSION))
17 changes: 17 additions & 0 deletions src/cophi/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,20 @@ def lazy_reading(filepaths):
ignore_index=True)
logger.info("Constructing Corpus object ...")
return cophi.model.Corpus(documents), metadata


def export(dtm, filepath, format="text"):
"""Export a document-term matrix.
Parameters:
dtm: A document-term matrix.
filepath: Path to output file. Possibel values are `plaintext`/`text` or
`svmlight`.
format: File format.
"""
if format.lower() in {"plaintext", "text"}:
cophi.model.Corpus.plaintext(dtm, filepath)
elif format.lower() in {"svmlight"}:
cophi.model.Corpus.svmlight(dtm, filepath)
else:
raise ValueError("'{}' is no supported file format.".format(format))
24 changes: 24 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ def make_file(tmpdir, fname, content):
p.write(content)
return p

@pytest.fixture
def document():
return cophi.model.Document(DOCUMENT, "document", r"\w")

@pytest.fixture
def corpus(document):
return cophi.model.Corpus([document])

def test_document(tmpdir):
filepath = make_file(tmpdir, "document.txt", DOCUMENT)
document = cophi.document(str(filepath), token_pattern=r"\w")
Expand All @@ -21,3 +29,19 @@ def test_corpus(tmpdir):
corpus, metadata = cophi.corpus(directory)
assert metadata["parent"].iloc[0] == str(directory)
assert corpus.documents[0].text == DOCUMENT

def test_export(corpus):
output = pathlib.Path("corpus.svmlight")
cophi.export(corpus.dtm, output, "svmlight")
assert output.exists()
with output.open("r", encoding="utf-8") as file:
assert file.read() == "document document a:1 b:1 c:1 d:1 e:1 f:1\n"

output = pathlib.Path("corpus.txt")
cophi.export(corpus.dtm, output, "text")
assert output.exists()
with output.open("r", encoding="utf-8") as file:
assert file.read() == "document document a b c d e f\n"

with pytest.raises(ValueError):
cophi.export(corpus.dtm, output, "unknown")
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ def test_parameter():
parameter = cophi.utils._parameter(TOKENS, "entropy")
assert len(parameter) == 2
parameter = cophi.utils._parameter(TOKENS, "ttr")
assert len(parameter) == 2
assert len(parameter) == 2

0 comments on commit dfa82be

Please sign in to comment.