Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
40cd9fd
commit 7d9289b
Showing
17 changed files
with
380 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,6 @@ _build | |
htmlcov | ||
tests.xml | ||
.pytest_cache | ||
notebooks/ddw-0.4.6.jar | ||
notebooks/ddw.log | ||
notebooks/configs/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
VERSION = (1, 2, 3) | ||
VERSION = (1, 3, 0) | ||
|
||
__version__ = ".".join(map(str, VERSION)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
cophi.dkpro | ||
~~~~~~~~~~~ | ||
This module allows you to communicate with the Java CLI tool | ||
DARIAH DKPro-Wrapper in Python. | ||
""" | ||
|
||
from cophi.dkpro.api import process, pipe | ||
from cophi.dkpro import utils, model, core |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
""" | ||
cophi.dkpro.api | ||
~~~~~~~~~~~~~~~ | ||
This module implements the high-level API for the DARIAH-DKPro-Wrapper. | ||
""" | ||
|
||
from pathlib import Path | ||
import tempfile | ||
|
||
from cophi import dkpro, text | ||
|
||
|
||
def process(path, jar, language, reader, xms="4g"): | ||
"""Process a textfile with the DARIAH-DKPro-Wrapper. | ||
Parameters: | ||
path (str): Path to text file. | ||
jar (str): Path to JAR file. | ||
language (str): Language of the text. | ||
reader (str): File reader, either `text` or `xml`. | ||
xms (str): Size to allocate by JVM. | ||
""" | ||
output = Path(tempfile.gettempdir(), "dariah-dkpro-output") | ||
if not output.exists(): | ||
output.mkdir() | ||
|
||
d = dkpro.model.DKPro(jar=jar, | ||
xms=xms) | ||
|
||
d.process(input=path, | ||
output=output, | ||
language=language, | ||
reader=reader) | ||
|
||
for file in output.glob("*.csv"): | ||
yield dkpro.model.Document(file) | ||
|
||
|
||
def pipe(filepath, jar, language, lemma, pos, **kwargs): | ||
"""Pipe a file through DARIAH-DKPro-Wrapper. | ||
""" | ||
for doc in process(filepath, jar, language, "text"): | ||
if pos: | ||
doc = doc.filter(pos) | ||
title = doc.name | ||
else: | ||
doc = doc.raw | ||
title = doc.name | ||
content = " ".join(doc["Lemma" if lemma else "Token"]) | ||
return text.model.Document(content, title=title, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
""" | ||
cophi.dkpro.core | ||
~~~~~~~~~~~~~~~~ | ||
This module implements the core functions of the DKPro module. | ||
""" | ||
|
||
import csv | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
|
||
from cophi import dkpro | ||
|
||
|
||
def call(jar, xms="4g", **parameters): | ||
"""Call DARIAH DKPro-Wrapper. | ||
Parameter: | ||
xms (str): Initial memory allocation pool for Java Virtual Machine. | ||
jar (str): Path to jarfile. | ||
**parameter: Additional parameters for DARIAH DKPro-Wrapper. | ||
""" | ||
# Basic subprocess command: | ||
args = ["java", "-Xms{}".format(xms), "-jar", jar] | ||
|
||
# Append additional parameters: | ||
for parameter, value in parameters.items(): | ||
# Support synonyms for `-input` parameter: | ||
if parameter in {"filepath", "directory", "path", "corpus"}: | ||
args.append("-input") | ||
else: | ||
args.append("-{}".format(parameter)) | ||
if value: | ||
args.append(str(value)) | ||
return dkpro.utils.call(args) |
Oops, something went wrong.