Test examples using doctest (#10)

daac-tools · Apr 13, 2023 · 86c1251 · 86c1251
1 parent 044ce52
commit 86c1251
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 52 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -31,10 +31,13 @@ jobs:
     - name: Test package
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements-dev.txt
+        pip install -r requirements-dev.txt zstandard
+        python -c "import zstandard;zstandard.ZstdDecompressor().copy_stream(open('tests/data/system.dic.zst','rb'),open('tests/data/system.dic','wb'))"
         pip install vibrato --no-index --find-links target/wheels --force-reinstall
         mypy --strict tests
         pytest
+        python -m doctest README.md
+        python -m doctest docs/source/examples.rst
 
   pack-sdist:
     needs: [ test ]

diff --git a/README.md b/README.md
@@ -40,43 +40,40 @@ To perform tokenization, follow [the document of Vibrato](https://github.com/daa
 Check the version number as shown below to use compatible models:
 
 ```python
-import vibrato
-vibrato.VIBRATO_VERSION
-#=> "0.5.0"
+>>> import vibrato
+>>> vibrato.VIBRATO_VERSION
+'0.5.0'
+
 ```
 
 Examples:
 
 ```python
-import vibrato
+>>> import vibrato
+
+>>> with open('tests/data/system.dic', 'rb') as fp:
+...     tokenizer = vibrato.Vibrato(fp.read())
 
-with open('path/to/system.dic', 'rb') as fp:
-    dict_data = fp.read()
-tokenizer = vibrato.Vibrato(dict_data)
+>>> tokens = tokenizer.tokenize('社長は火星猫だ')
 
-tokens = tokenizer.tokenize('社長は火星猫だ')
+>>> len(tokens)
+5
 
-len(tokens)
-#=> 5
+>>> tokens[0]
+Token { surface: "社長", feature: "名詞,普通名詞,一般,*" }
 
-list(tokens)
-#=> [Token { surface: "社長", feature: "名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,," },
-#    Token { surface: "は", feature: "助詞,係助詞,*,*,*,*,は,ハ,ワ,," },
-#    Token { surface: "火星", feature: "名詞,一般,*,*,*,*,火星,カセイ,カセイ,," },
-#    Token { surface: "猫", feature: "名詞,一般,*,*,*,*,猫,ネコ,ネコ,," },
-#    Token { surface: "だ", feature: "助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ,," }]
+>>> tokens[0].surface()
+'社長'
 
-tokens[0].surface()
-#=> '社長'
+>>> tokens[0].feature()
+'名詞,普通名詞,一般,*'
 
-tokens[0].feature()
-#=> '名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,,'
+>>> tokens[0].start()
+0
 
-tokens[0].start()
-#=> 0
+>>> tokens[0].end()
+2
 
-tokens[0].end()
-#=> 2
 ```
 
 ## Note for distributed models
@@ -85,22 +82,14 @@ The distributed models are compressed in zstd format. If you want to load these
 you must decompress them outside the API.
 
 ```python
-import vibrato
-import zstandard  # zstandard package in PyPI
-
-dctx = zstandard.ZstdDecompressor()
-with open('path/to/system.dic.zst', 'rb') as fp:
-    dict_reader = dctx.stream_reader(fp)
-    tokenizer = vibrato.Vibrato(dict_reader.read())
-```
-
-## Documentation
+>>> import vibrato
+>>> import zstandard  # zstandard package in PyPI
 
-Use the help function to show the API reference.
+>>> dctx = zstandard.ZstdDecompressor()
+>>> with open('tests/data/system.dic.zst', 'rb') as fp:
+...     with dctx.stream_reader(fp) as dict_reader:
+...         tokenizer = vibrato.Vibrato(dict_reader.read())
 
-```python
-import vibrato
-help(vibrato)
 ```
 
 ## License

diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -19,27 +19,22 @@ Examples:
 
    >>> import vibrato
 
-   >>> with open('path/to/system.dic', 'rb') as fp:
-   ...     dict_data = fp.read()
-   >>> tokenizer = vibrato.Vibrato(dict_data)
+   >>> with open('tests/data/system.dic', 'rb') as fp:
+   ...     tokenizer = vibrato.Vibrato(fp.read())
 
    >>> tokens = tokenizer.tokenize('社長は火星猫だ')
 
    >>> len(tokens)
    5
 
-   >>> list(tokens)
-   [Token { surface: "社長", feature: "名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,," },
-    Token { surface: "は", feature: "助詞,係助詞,*,*,*,*,は,ハ,ワ,," },
-    Token { surface: "火星", feature: "名詞,一般,*,*,*,*,火星,カセイ,カセイ,," },
-    Token { surface: "猫", feature: "名詞,一般,*,*,*,*,猫,ネコ,ネコ,," },
-    Token { surface: "だ", feature: "助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ,," }]
+   >>> tokens[0]
+   Token { surface: "社長", feature: "名詞,普通名詞,一般,*" }
 
    >>> tokens[0].surface()
    '社長'
 
    >>> tokens[0].feature()
-   '名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,,'
+   '名詞,普通名詞,一般,*'
 
    >>> tokens[0].start()
    0
@@ -56,6 +51,6 @@ you must decompress them outside the API:
    >>> import zstandard  # zstandard package in PyPI
 
    >>> dctx = zstandard.ZstdDecompressor()
-   >>> with open('path/to/system.dic.zst', 'rb') as fp:
-   ...     dict_reader = dctx.stream_reader(fp)
-   >>> tokenizer = vibrato.Vibrato(dict_reader.read())
+   >>> with open('tests/data/system.dic.zst', 'rb') as fp:
+   ...     with dctx.stream_reader(fp) as dict_reader:
+   ...         tokenizer = vibrato.Vibrato(dict_reader.read())
diff --git a/tests/data/system.dic.zst b/tests/data/system.dic.zst