Skip to content

Commit

Permalink
Update Vaporetto and add VAPORETTO_VERSION (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
vbkaisetsu committed Mar 30, 2023
1 parent 9f30271 commit 9599b61
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 11 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ hashbrown = "0.13.2" # MIT or Apache-2.0
ouroboros = "0.15.5" # MIT or Apache-2.0
pyo3 = { version = "0.18.0", features = ["extension-module"] } # Apache-2.0
ruzstd = "0.3.0" # MIT
vaporetto_rust = { package = "vaporetto", version = "0.6.1", features = ["kytea"] } # MIT or Apache-2.0
vaporetto_rules = "0.6.1" # MIT or Apache-2.0
vaporetto_rust = { package = "vaporetto", version = "0.6.2", features = ["kytea"] } # MIT or Apache-2.0
vaporetto_rules = "0.6.2" # MIT or Apache-2.0
7 changes: 7 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,10 @@ API reference

.. autoclass:: vaporetto.Token
:members:

.. data:: VAPORETTO_VERSION
:type: str
:canonical: vaporetto.VAPORETTO_VERSION

Indicates the version number of *vaporetto* used by this wrapper. It can be used to check the
compatibility of the model file.
18 changes: 9 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,20 @@ impl Token {
/// Return the surface of this token.
///
/// :rtype: str
#[pyo3(text_signature = "($self, /)")]
fn surface(&self, py: Python) -> Py<PyUnicode> {
self.list.borrow(py).surfaces[self.index].0.clone_ref(py)
}

/// Return the start position (inclusive) in characters.
///
/// :rtype: int
#[pyo3(text_signature = "($self, /)")]
fn start(&self, py: Python) -> usize {
self.list.borrow(py).surfaces[self.index].1
}

/// Return the end position (exclusive) in characters.
///
/// :rtype: int
#[pyo3(text_signature = "($self, /)")]
fn end(&self, py: Python) -> usize {
self.list.borrow(py).surfaces[self.index].2
}
Expand All @@ -52,7 +49,7 @@ impl Token {
/// :type index: int
/// :rtype: Optional[str]
/// :raises ValueError: if the index is out of range.
#[pyo3(text_signature = "($self, index, /)")]
#[pyo3(signature = (index, /))]
fn tag(&self, py: Python, index: usize) -> PyResult<Option<Py<PyUnicode>>> {
let list = self.list.borrow(py);
if index < list.n_tags {
Expand All @@ -68,7 +65,6 @@ impl Token {
/// Return the number of tags assigned to this token.
///
/// :rtype: int
#[pyo3(text_signature = "($self, /)")]
fn n_tags(&self, py: Python) -> usize {
self.list.borrow(py).n_tags
}
Expand Down Expand Up @@ -235,6 +231,9 @@ impl PredictorWrapper {
/// :param model: A byte sequence of the model.
/// :param predict_tags: If True, the tokenizer predicts tags.
/// :param wsconst: Does not split the specified character types.
/// ``D``: Digit, ``R``: Roman, ``H``: Hiragana, ``T``: Katakana, ``K``: Kanji,
/// ``O``: Other, ``G``: Grapheme cluster. You can specify multiple types such as
/// ``DGR``.
/// :param norm: If True, input texts will be normalized beforehand.
/// :type model: bytes
/// :type predict_tags: bool
Expand Down Expand Up @@ -311,7 +310,7 @@ impl Vaporetto {
#[pymethods]
impl Vaporetto {
#[new]
#[args(predict_tags = "false", wsconst = "\"\"", norm = "true")]
#[pyo3(signature = (model, /, predict_tags=false, wsconst="", norm=true))]
fn new(
py: Python,
model: &[u8],
Expand Down Expand Up @@ -345,7 +344,7 @@ impl Vaporetto {
/// :raises ValueError: if the model is invalid.
/// :raises ValueError: if the wsconst value is invalid.
#[staticmethod]
#[args(wsconst = "\"\"", norm = "true")]
#[pyo3(signature = (model, /, wsconst="", norm=true))]
#[pyo3(text_signature = "(model, /, wsconst = \"\", norm = True)")]
fn create_from_kytea_model(
py: Python,
Expand All @@ -366,7 +365,7 @@ impl Vaporetto {
/// :param text: A text to tokenize.
/// :type text: str
/// :rtype: vaporetto.TokenList
#[pyo3(text_signature = "($self, text, /)")]
#[pyo3(signature = (text, /))]
fn tokenize(&mut self, py: Python, text: String) -> TokenList {
if self
.wrapper
Expand Down Expand Up @@ -419,7 +418,7 @@ impl Vaporetto {
/// :param text: A text to tokenize.
/// :type text: str
/// :rtype: str
#[pyo3(text_signature = "($self, text, /)")]
#[pyo3(signature = (text, /))]
fn tokenize_to_string(&mut self, py: Python, text: String) -> Py<PyUnicode> {
if self
.wrapper
Expand All @@ -442,5 +441,6 @@ fn vaporetto(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<TokenList>()?;
m.add_class::<TokenIterator>()?;
m.add_class::<Token>()?;
m.add("VAPORETTO_VERSION", vaporetto_rust::VERSION)?;
Ok(())
}

0 comments on commit 9599b61

Please sign in to comment.