Merge tag '0.9.0' into develop

no message
ckiplab · Jul 21, 2020 · d1cc7e8 · d1cc7e8
2 parents 758269f + fc0dceb
commit d1cc7e8
Show file tree

Hide file tree

Showing 63 changed files with 1,847 additions and 1,400 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -1,5 +1,7 @@
 [MASTER]
 
+# Use pylint 2.2.3
+
 disable =
 	arguments-differ,
 	bad-continuation,

diff --git a/DEVELOP.md b/DEVELOP.md
@@ -0,0 +1,11 @@
+# Release TODO
+- change version number
+- make clean
+- make lint
+- make doc
+- make tox
+- make tox-report
+- make upload
+
+# Requirements
+Make sure test/requirements.txt matches setup.py.
diff --git a/Makefile b/Makefile
@@ -1,17 +1,18 @@
 PY = python3
 RM = rm -rf
-LINT = pylint --rcfile=./.pylintrc
 TWINE = twine
+TOX = tox
+LINT = pylint --rcfile=./.pylintrc
 
-.PHONY: all check dist sdist testall test lint doc upload clean
+.PHONY: all check dist sdist test tox tox-v tox-report lint doc upload clean
 
-all: dist check testall
+all: dist check test
 
 dist: sdist bdist_wheel
 
-testall: test lint
+test: tox lint
 
-sdist bdist_wheel test:
+sdist bdist_wheel:
 	$(PY) setup.py $@
 
 lint:
@@ -20,6 +21,9 @@ lint:
 check:
 	$(TWINE) check dist/*
 
+tox tox-v tox-report:
+	( cd test ; make $@ )
+
 doc:
 	( cd docs ; make clean ; make html )
 
@@ -28,6 +32,7 @@ upload: dist check
 	$(TWINE) upload --repository-url https://test.pypi.org/legacy/ dist/* --verbose
 
 clean:
-	( cd docs ; make clean )
-	$(PY) setup.py clean -a
-	$(RM) build dist *.egg-info *.so __pycache__
+	- ( cd docs ; make clean )
+	- ( cd test ; make clean )
+	- $(PY) setup.py clean -a
+	- $(RM) build dist *.egg-info __pycache__
diff --git a/README.rst b/README.rst
@@ -1,5 +1,5 @@
-CKIP CoreNLP Toolkits
----------------------
+CKIP CoreNLP Toolkit
+--------------------
 
 Features
 ^^^^^^^^
@@ -8,7 +8,7 @@ Features
 - Word Segmentation
 - Part-of-Speech Tagging
 - Named-Entity Recognition
-- Sentence Parsing
+- Constituency Parsing
 - Coreference Resolution
 
 Git
@@ -105,13 +105,13 @@ Driver                            Built-in  CkipTagger  CkipClassic
 Sentence Segmentation             ✔
 Word Segmentation†                          ✔           ✔
 Part-of-Speech Tagging†                     ✔           ✔
-Sentence Parsing                                        ✔
+Constituency Parsing                                        ✔
 Named-Entity Recognition                    ✔
 Coreference Resolution‡           ✔         ✔           ✔
 ================================  ========  ==========  ===========
 
 - † These drivers require only one of either backends.
-- ‡ Coreference implementation does not require any backend, but requires results from word segmentation, part-of-speech tagging, sentence parsing, and named-entity recognition.
+- ‡ Coreference implementation does not require any backend, but requires results from word segmentation, part-of-speech tagging, constituency parsing, and named-entity recognition.
 
 Installation via Pip
 ^^^^^^^^^^^^^^^^^^^^
@@ -123,8 +123,7 @@ Installation via Pip
 Usage
 -----
 
-- See https://ckipnlp.readthedocs.io/en/latest/main/usage.html for Usage.
-- See https://ckipnlp.readthedocs.io/en/latest/_api/ckipnlp.html for API details.
+See https://ckipnlp.readthedocs.io/ for API details.
 
 License
 -------

diff --git a/ckipnlp/__init__.py b/ckipnlp/__init__.py
@@ -2,15 +2,15 @@
 # -*- coding:utf-8 -*-
 
 """
-The Official CKIP CoreNLP Toolkits.
+The Official CKIP CoreNLP Toolkit.
 """
 
 __author_name__ = 'Mu Yang'
 __author_email__ = 'emfomy@gmail.com'
 __copyright__ = '2018-2020 CKIP Lab'
 
 __title__ = 'CKIPNLP'
-__version__ = '0.9.0.dev'
+__version__ = '0.9.0'
 __description__ = 'CKIP CoreNLP'
 __license__ = 'CC BY-NC-SA 4.0'
 

diff --git a/ckipnlp/container/__init__.py b/ckipnlp/container/__init__.py
@@ -12,8 +12,8 @@
 from .text import *
 from .seg import *
 from .ner import *
-from .parsed import *
+from .parse import *
 from .coref import *
 
 from .util.wspos import *
-from .util.parsed_tree import *
+from .util.parse_tree import *
diff --git a/ckipnlp/container/base.py b/ckipnlp/container/base.py
@@ -36,7 +36,7 @@ def from_text(cls, data):
         ----------
             data : str
         """
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     @_abstractmethod
     def to_text(self):
@@ -46,33 +46,33 @@ def to_text(self):
         -------
             str
         """
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     ########################################################################################################################
 
     @classmethod
     @_abstractmethod
     def from_list(cls, data):
         """Construct an instance from python built-in containers."""
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     @_abstractmethod
     def to_list(self):
         """Transform to python built-in containers."""
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     ########################################################################################################################
 
     @classmethod
     @_abstractmethod
     def from_dict(cls, data):
         """Construct an instance from python built-in containers."""
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     @_abstractmethod
     def to_dict(self):
         """Transform to python built-in containers."""
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     ########################################################################################################################
 
@@ -106,11 +106,11 @@ class BaseTuple(Base, metaclass=_ABCMeta):
     @classmethod
     @_abstractmethod
     def from_text(cls, data):
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     @_abstractmethod
     def to_text(self):
-        return NotImplemented
+        return NotImplemented  # pragma: no cover
 
     ########################################################################################################################
 
@@ -152,7 +152,7 @@ def to_dict(self):
         -------
             dict
         """
-        return self._asdict()  # pylint: disable=no-member
+        return dict(self._asdict())  # pylint: disable=no-member
 
 ################################################################################################################################
 

diff --git a/ckipnlp/container/coref.py b/ckipnlp/container/coref.py
@@ -43,7 +43,7 @@ class CorefToken(_BaseTuple, _CorefToken):
                 * `'zero'`: null element coreference target.
 
         idx : Tuple[int, int]
-            the node indexes (clause index, token index) in parsed tree.
+            the node indexes (clause index, token index) in parse tree.
             **idx[1]** = `None` if this node is a null element or the punctuations.
 
     Note
@@ -67,7 +67,7 @@ class CorefToken(_BaseTuple, _CorefToken):
                 [
                     '畢卡索',       # token word
                     (0, 'source'), # coref ID and type
-                    (2, 0),        # node index
+                    (2, 2),        # node index
                 ]
 
         Dict format
@@ -78,10 +78,13 @@ class CorefToken(_BaseTuple, _CorefToken):
                 {
                     'word': '畢卡索',        # token word
                     'coref': (0, 'source'), # coref ID and type
-                    'idx': (2, 0),          # node index
+                    'idx': (2, 2),          # node index
                 }
     """
 
+    def __new__(cls, word, coref, idx, **kwargs):  # pylint: disable=signature-differs
+        return super().__new__(cls, word, tuple(coref) if coref else None, tuple(idx), **kwargs)
+
     from_text = NotImplemented
 
     def to_text(self):
@@ -108,13 +111,13 @@ class CorefSentence(_BaseSentence):
             .. code-block:: python
 
                 [
-                    [ '「', None, (0, 0,) ],
-                    [ '完蛋', None, (1, 0,) ],
-                    [ '了', None, (1, 1,) ],
-                    [ '！」', None, (1, 2,) ],
-                    [ '畢卡索', (0, 'source'), (2, 0,), ],
-                    [ '他', (0, 'target'), (2, 1,), ],
-                    [ '想', None, (2, 2,), ],
+                    [ '「', None, (0, None,), ],
+                    [ '完蛋', None, (1, 0,), ],
+                    [ '了', None, (1, 1,), ],
+                    [ '！」', None, (1, None,), ],
+                    [ '畢卡索', (0, 'source'), (2, 2,), ],
+                    [ '他', (0, 'target'), (2, 3,), ],
+                    [ '想', None, (2, 4,), ],
                 ]
 
         Dict format
@@ -123,23 +126,20 @@ class CorefSentence(_BaseSentence):
             .. code-block:: python
 
                 [
-                    { 'word': '「', 'coref': None, 'idx': (0, 0,) ],
-                    { 'word': '完蛋', 'coref': None, 'idx': (1, 0,) ],
-                    { 'word': '了', 'coref': None, 'idx': (1, 1,) ],
-                    { 'word': '！」', 'coref': None, 'idx': (1, 2,) ],
-                    { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': (2, 0,), ],
-                    { 'word': '他', 'coref': (0, 'target'), 'idx': (2, 1,), ],
-                    { 'word': '想', 'coref': None, 'idx': (2, 2,), ],
+                    { 'word': '「', 'coref': None, 'idx': (0, None,), },
+                    { 'word': '完蛋', 'coref': None, 'idx': (1, 0,), },
+                    { 'word': '了', 'coref': None, 'idx': (1, 1,), },
+                    { 'word': '！」', 'coref': None, 'idx': (1, None,), },
+                    { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': (2, 2,), },
+                    { 'word': '他', 'coref': (0, 'target'), 'idx': (2, 3,), },
+                    { 'word': '想', 'coref': None, 'idx': (2, 4,), },
                 ]
     """
 
     item_class = CorefToken
 
     from_text = NotImplemented
 
-    def to_text(self):
-        return '\u3000'.join(map(self._item_to_text, self))
-
 ################################################################################################################################
 
 class CorefParagraph(_BaseList):
@@ -154,7 +154,7 @@ class CorefParagraph(_BaseList):
 
                 [
                     '「\u3000完蛋\u3000了\u3000！」\u3000，\u3000畢卡索_0\u3000他_0\u3000想', # Sentence 1
-                    '然後\u3000None_0\u3000就\u3000跑\u3000了', # Sentence 1
+                    '但是\u3000None_0\u3000也\u3000沒有\u3000辦法', # Sentence 1
                 ]
 
         List format
@@ -164,20 +164,20 @@ class CorefParagraph(_BaseList):
 
                 [
                     [ # Sentence 1
-                        [ '「', None, (0, 0,) ],
-                        [ '完蛋', None, (1, 0,) ],
-                        [ '了', None, (1, 1,) ],
-                        [ '！」', None, (1, 2,) ],
-                        [ '畢卡索', (0, 'source'), (2, 0,), ],
-                        [ '他', (0, 'target'), (2, 1,), ],
-                        [ '想', None, (2, 2,), ],
+                        [ '「', None, (0, None,), ],
+                        [ '完蛋', None, (1, 0,), ],
+                        [ '了', None, (1, 1,), ],
+                        [ '！」', None, (1, None,), ],
+                        [ '畢卡索', (0, 'source'), (2, 2,), ],
+                        [ '他', (0, 'target'), (2, 3,), ],
+                        [ '想', None, (2, 4,), ],
                     ],
                     [ # Sentence 2
-                        [ '然後', None, (0, 0,) ],
-                        [ None, (0, 'zero'), (0, 1,) ],
-                        [ '就', None, (0, 2,) ],
-                        [ '跑', None, (0, 3,) ],
-                        [ '了', None, (0, 4,) ],
+                        [ '但是', None, (0, 1,), ],
+                        [ None, (0, 'zero'), (0, None,), ],
+                        [ '也', None, (0, 2,), ],
+                        [ '沒有', None, (0, 3,), ],
+                        [ '辦法', None, (0, 5,), ],
                     ],
                 ]
 
@@ -188,20 +188,20 @@ class CorefParagraph(_BaseList):
 
                 [
                     [ # Sentence 1
-                        { 'word': '「', 'coref': None, 'idx': (0, 0,) ],
-                        { 'word': '完蛋', 'coref': None, 'idx': (1, 0,) ],
-                        { 'word': '了', 'coref': None, 'idx': (1, 1,) ],
-                        { 'word': '！」', 'coref': None, 'idx': (1, 2,) ],
-                        { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': (2, 0,), ],
-                        { 'word': '他', 'coref': (0, 'target'), 'idx': (2, 1,), ],
-                        { 'word': '想', 'coref': None, 'idx': (2, 2,), ],
+                        { 'word': '「', 'coref': None, 'idx': (0, None,), },
+                        { 'word': '完蛋', 'coref': None, 'idx': (1, 0,), },
+                        { 'word': '了', 'coref': None, 'idx': (1, 1,), },
+                        { 'word': '！」', 'coref': None, 'idx': (1, None,), },
+                        { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': (2, 2,), },
+                        { 'word': '他', 'coref': (0, 'target'), 'idx': (2, 3,), },
+                        { 'word': '想', 'coref': None, 'idx': (2, 4,), },
                     ],
                     [ # Sentence 2
-                        { 'word': '然後', 'coref': None, 'idx': (0, 0,) ],
-                        { 'word': None, 'coref': (0, 'zero'), 'idx': (1, 0,) ],
-                        { 'word': '就', 'coref': None, 'idx': (2, 0,) ],
-                        { 'word': '跑', 'coref': None, 'idx': (3, 0,) ],
-                        { 'word': '了', 'coref': None, 'idx': (4, 0,) ],
+                        { 'word': '但是', 'coref': None, 'idx': (0, 1,), },
+                        { 'word': None, 'coref': (0, 'zero'), 'idx': (0, None,), },
+                        { 'word': '也', 'coref': None, 'idx': (0, 2,), },
+                        { 'word': '沒有', 'coref': None, 'idx': (0, 3,), },
+                        { 'word': '辦法', 'coref': None, 'idx': (0, 5,), },
                     ],
                 ]
     """

diff --git a/ckipnlp/container/ner.py b/ckipnlp/container/ner.py
@@ -83,6 +83,9 @@ class NerToken(_BaseTuple, _NerToken):
                 )
     """
 
+    def __new__(cls, word, ner, idx, **kwargs):  # pylint: disable=signature-differs
+        return super().__new__(cls, word, ner, tuple(idx), **kwargs)
+
     to_text = NotImplemented
     from_text = NotImplemented