Update documentation.

ckiplab · Apr 23, 2020 · 05c354d · 05c354d
1 parent 752301b
commit 05c354d
Show file tree

Hide file tree

Showing 34 changed files with 2,325 additions and 78 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -5,7 +5,6 @@ disable =
 	bad-continuation,
 	duplicate-code,
 	logging-fstring-interpolation,
-	missing-docstring,
 	too-few-public-methods,
 	too-many-ancestors,
 	too-many-branches,

diff --git a/README.rst b/README.rst
@@ -1,6 +1,18 @@
 Introduction
 ============
 
+Official CKIP CoreNLP Toolkits
+
+Features
+--------
+
+- Sentence Segmentation
+- Word Segmentation
+- Part-of-Speech Tagging
+- Sentence Parsing
+- Named-Entity Recognition
+- Co-Reference Delectation
+
 Git
 ---
 
@@ -74,6 +86,9 @@ External Links
 
 - `Online Demo <https://ckip.iis.sinica.edu.tw/service/corenlp>`_
 
+Installation
+============
+
 Requirements
 ------------
 
@@ -83,14 +98,109 @@ Requirements
 * `CkipTagger <https://pypi.org/project/ckiptagger>`_ 0.1.1+ [Optional, Recommended]
 * `CkipClassic <https://ckip-classic.readthedocs.io>`_ 1.0+ [Optional]
 
+Tool Requirements
+-----------------
+
+================================  ==========  ============  =============
+ Tool                              Built-in    CkipTagger    CkipClassic
+================================  ==========  ============  =============
+Sentence Segmentation              ✔
+Word Segmentation†                             ✔             ✔
+Part-of-Speech Tagging†                        ✔             ✔
+Sentence Parsing                                             ✔
+Named-Entity Recognition                       ✔
+Co-Reference Delectation‡          ✔           ✔             ✔
+================================  ==========  ============  =============
+
+- † These tools require only one of either backends.
+- ‡ Co-Reference implementation does not require any backend, but requires results from word segmentation, part-of-speech tagging, sentence parsing, and named-entity recognition.
+
+Installation via Pip
+--------------------
+
+- No backend (not recommended): ``pip install ckipnlp``.
+- With CkipTagger backend (recommended): ``pip install ckipnlp[tagger]``.
+- With CkipClassic backend: ``pip install ckipnlp[classic]``.
+- With both backend: ``pip install ckipnlp[tagger,classic]``.
+
+Please refer https://ckip-classic.readthedocs.io for CkipClassic installation guide.
+
 Usage
 =====
 
 See http://ckipnlp.readthedocs.io/en/latest/_api/ckipnlp.html for API details.
 
+Pipeline
+--------
+
+.. image:: ../_static/image/pipeline.svg
+
+.. code-block:: python
+
+   import ckipnlp
+   print(ckipnlp.__name__, ckipnlp.__version__)
+
+   ################################################################
+
+   from ckipnlp.pipeline import CkipPipeline, CkipDocument
+
+   pipeline = CkipPipeline()
+   doc = CkipDocument(
+      raw='中文字喔，啊哈哈哈',
+   )
+
+   # Word Segmentation
+   pipeline.get_ws(doc)
+   print(doc.ws)
+   for line in doc.ws:
+       print(line.to_text())
+
+   # Part-of-Speech Tagging
+   pipeline.get_pos(doc)
+   print(doc.pos)
+   for line in doc.pos:
+       print(line.to_text())
+
+   # Sentence Parsing
+   pipeline.get_parsed(doc)
+   print(doc.parsed)
+
+   # Named-Entity Recognition
+   pipeline.get_ner(doc)
+   print(doc.ner)
+
+   ################################################################
+
+   from ckipnlp.container.wspos import WsPosParagraph
+
+   # Word Segmentation & Part-of-Speech Tagging
+   for line in WsPosParagraph.to_text(doc.ws, doc.pos):
+       print(line)
+
+Co-Reference Pipeline
+---------------------
+
+.. image:: ../_static/image/coref_pipeline.svg
+
+.. code-block:: python
+
+   import ckipnlp
+   print(ckipnlp.__name__, ckipnlp.__version__)
+
+   ################################################################
+
+   from ckipnlp.pipeline import CkipCorefPipeline, CkipDocument
+
+   pipeline = CkipCorefPipeline()
+   doc = CkipDocument(
+      raw='畢卡索他想，完蛋了',
+   )
 
-FAQ
-===
+   # Co-Reference
+   corefdoc = pipeline(doc)
+   print(corefdoc.coref)
+   for line in corefdoc.coref:
+       print(line.to_text())
 
 License
 =======

diff --git a/ckipnlp/__init__.py b/ckipnlp/__init__.py
@@ -1,6 +1,10 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+"""
+The Official CKIP CoreNLP Toolkits.
+"""
+
 __author_name__ = 'Mu Yang'
 __author_email__ = 'emfomy@gmail.com'
 __copyright__ = '2018-2020 CKIP Lab'

diff --git a/ckipnlp/container/coref.py b/ckipnlp/container/coref.py
@@ -64,7 +64,7 @@ class CorefToken(_BaseTuple, _CorefToken):
             .. code-block:: python
 
                 {
-                    'word': '畢卡索',       # token word
+                    'word': '畢卡索',        # token word
                     'coref': (0, 'source'), # coref ID and type
                     'idx': 2,               # node index
                 }
@@ -106,9 +106,9 @@ class CorefSentence(_BaseSentence):
             .. code-block:: python
 
                 [
-                    { word: '畢卡索', coref: (0, 'source'), idx: 2, }, # coref-token 1
-                    { word: '他', coref: (0, 'target'), idx: 3, },    # coref-token 2
-                    { word: '想', coref: None, idx: 4, },             # coref-token 3
+                    { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': 2, }, # coref-token 1
+                    { 'word': '他', 'coref': (0, 'target'), 'idx': 3, },    # coref-token 2
+                    { 'word': '想', 'coref': None, 'idx': 4, },             # coref-token 3
                 ]
 
         List format
@@ -154,14 +154,14 @@ class CorefParagraph(_BaseList):
 
                 [
                     [ # Sentence 1
-                        { word: '畢卡索', coref: (0, 'source'), idx: 2, },
-                        { word: '他', coref: (0, 'target'), idx: 3, },
-                        { word: '想', coref: None, idx: 4, },
+                        { 'word': '畢卡索', 'coref': (0, 'source'), 'idx': 2, },
+                        { 'word': '他', 'coref': (0, 'target'), 'idx': 3, },
+                        { 'word': '想', 'coref': None, 'idx': 4, },
                     ],
                     [ # Sentence 2
-                        { word: None, coref: (0, 'zero'), None, },
-                        { word: '完蛋', coref: None, idx: 1, },
-                        { word: '了', coref: None, idx: 2, },
+                        { 'word': None, 'coref': (0, 'zero'), None, },
+                        { 'word': '完蛋', 'coref': None, 'idx': 1, },
+                        { 'word': '了', 'coref': None, 'idx': 2, },
                     ],
                 ]
 

diff --git a/ckipnlp/container/ner.py b/ckipnlp/container/ner.py
@@ -28,7 +28,7 @@ class _NerToken(_NamedTuple):
     idx: _Tuple[int, int]
 
 class NerToken(_BaseTuple, _NerToken):
-    """A NER token.
+    """A named-entity recognition token.
 
     Attributes
     ----------
@@ -101,7 +101,7 @@ def to_tagger(self):
 ################################################################################################################################
 
 class NerSentence(_BaseSentence):
-    """A list of NER sentence.
+    """A named-entity recognition sentence.
 
     .. admonition:: Data Structure Examples
 
@@ -158,7 +158,7 @@ def to_tagger(self):
 ################################################################################################################################
 
 class NerParagraph(_BaseList):
-    """A list of NER sentence.
+    """A list of named-entity recognition sentence.
 
     .. admonition:: Data Structure Examples
 

diff --git a/ckipnlp/container/tree/__init__.py b/ckipnlp/container/tree/__init__.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+"""
+This module implements specialized tree containers for CKIPNLP.
+"""
+
+__author__ = 'Mu Yang <http://muyang.pro>'
+__copyright__ = '2018-2020 CKIP Lab'
+__license__ = 'CC BY-NC-SA 4.0'
diff --git a/ckipnlp/container/tree/parsed.py b/ckipnlp/container/tree/parsed.py
@@ -2,7 +2,7 @@
 # -*- coding:utf-8 -*-
 
 """
-This module provides containers for parsed trees.
+This module provides tree containers for sentence parsing.
 """
 
 __author__ = 'Mu Yang <http://muyang.pro>'
@@ -220,7 +220,7 @@ def __repr__(self):
         )
 
     @property
-    def head_first(self):
+    def head_first(self):  # pylint: disable=missing-docstring
         return self.head.identifier <= self.tail.identifier
 
     def to_dict(self):

diff --git a/ckipnlp/container/wspos.py b/ckipnlp/container/wspos.py
@@ -144,7 +144,7 @@ def from_text(cls, data):
         Parameters
         ----------
             data : str
-                text such as ``'中文字(Na)\u3000喔(T)'``.
+                text such as ``'中文字(Na)\\u3000喔(T)'``.
 
         Returns
         -------
@@ -169,7 +169,7 @@ def to_text(word, pos):
         Returns
         -------
             str
-                text such as ``'中文字(Na)\u3000喔(T)'``.
+                text such as ``'中文字(Na)\\u3000喔(T)'``.
         """
         return _sentence_to_text((word, pos,))
 
@@ -191,7 +191,7 @@ def from_text(cls, data):
         Parameters
         ----------
             data : Sequence[str]
-                list of sentences such as ``'中文字(Na)\u3000喔(T)'``.
+                list of sentences such as ``'中文字(Na)\\u3000喔(T)'``.
 
         Returns
         -------
@@ -216,6 +216,6 @@ def to_text(word, pos):
         Returns
         -------
             List[str]
-                list of sentences such as ``'中文字(Na)\u3000喔(T)'``.
+                list of sentences such as ``'中文字(Na)\\u3000喔(T)'``.
         """
         return list(_paragraph_to_text((word, pos,)))
diff --git a/ckipnlp/data/__init__.py b/ckipnlp/data/__init__.py
diff --git a/ckipnlp/data/coref/__init__.py b/ckipnlp/data/coref/__init__.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+# pylint: disable=missing-docstring
+
 __author__ = 'Mu Yang <http://muyang.pro>'
 __copyright__ = '2018-2020 CKIP Lab'
 __license__ = 'CC BY-NC-SA 4.0'

diff --git a/ckipnlp/data/coref/_human_words.py b/ckipnlp/data/coref/_human_words.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines
+# pylint: disable=missing-docstring, too-many-lines
 
 HUMAN_WORDS = {
     '一代紅顏',

diff --git a/ckipnlp/data/coref/_pronoun_words.py b/ckipnlp/data/coref/_pronoun_words.py
@@ -1,3 +1,5 @@
+# pylint: disable=missing-docstring
+
 PRONOUN_1ST_SINGLE_WORDS = { # speaker|說話者 \ 我們|we
     '余',
     '吾',

diff --git a/ckipnlp/data/coref/_self_words.py b/ckipnlp/data/coref/_self_words.py
@@ -1,3 +1,5 @@
+# pylint: disable=missing-docstring
+
 SELF_WORDS = {
     '一己',
     '小我',

diff --git a/ckipnlp/data/parsed.py b/ckipnlp/data/parsed.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
+# pylint: disable=missing-docstring
+
 __author__ = 'Mu Yang <http://muyang.pro>'
 __copyright__ = '2018-2020 CKIP Lab'
 __license__ = 'CC BY-NC-SA 4.0'

diff --git a/ckipnlp/driver/__init__.py b/ckipnlp/driver/__init__.py
@@ -2,7 +2,7 @@
 # -*- coding:utf-8 -*-
 
 """
-This module implements specialized drivers for CKIPNLP.
+This module implements drivers for CKIPNLP.
 """
 
 __author__ = 'Mu Yang <http://muyang.pro>'