Skip to content

Commit

Permalink
Merge pull request #31 from himkt/add-attributes-to-token
Browse files Browse the repository at this point in the history
Add attributes to token
  • Loading branch information
himkt committed Aug 30, 2019
2 parents dc62cf5 + 116fc03 commit dcb7f8c
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 9 deletions.
33 changes: 33 additions & 0 deletions tests/test_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Test for word tokenizers"""
import unittest

from tiny_tokenizer.tiny_tokenizer_token import Token


class TokenTest(unittest.TestCase):
"""Test ordinal word tokenizer."""

def test_token_without_feature(self):
token = Token(surface="大崎")
self.assertEqual("大崎", token.surface)
self.assertEqual("", token.feature)

def test_token_with_postag(self):
token = Token(surface="大崎", postag="名詞")
self.assertEqual("大崎", token.surface)
self.assertEqual("名詞", token.feature)

def test_token_with_postag2(self):
token = Token(
surface="大崎",
postag="名詞",
postag2="固有名詞,人名,姓",
conj_type="*",
conj_form="*",
origin_form="大崎",
yomi="オオサキ",
pron="オーサキ")

self.assertEqual(
"名詞,固有名詞,人名,姓,*,*,大崎,オオサキ,オーサキ",
token.feature)
60 changes: 51 additions & 9 deletions tiny_tokenizer/tiny_tokenizer_token.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
"""Token class."""
from typing import Optional


class Token:
"""Token class for tiny_tokenizer."""

def __init__(
self,
surface: str,
postag: Optional[str] = None,
postag2: Optional[str] = None
self,
surface: str,
postag: Optional[str] = None,
postag2: Optional[str] = None,
conj_type: Optional[str] = None,
conj_form: Optional[str] = None,
origin_form: Optional[str] = None,
yomi: Optional[str] = None,
pron: Optional[str] = None,
):
"""
Initializer for Token.
Expand All @@ -17,14 +23,29 @@ def __init__(
---
surface (str)
surface (original form) of a word
postag (Optional[str], default None)
postag (str, default: None)
part-of-speech tag of a word (optional)
postag2 (Optional[str], default None)
postag2 (str, default: None)
detailed part-of-speech tag of a word (optional)
conjugate type (str, default: None)
conjugate type of word (optional)
conjugate form (str, default: None)
conjugate type of word (optional)
origin_form (str, default: None)
original form of a word
yomi (str, default: None)
yomi of a word (optional)
pron (str, default: None)
pronounciation of a word (optional)
"""
self.surface = surface
self.postag = postag # NOQA
self.postag = postag
self.postag2 = postag2
self.conj_type = conj_type
self.conj_form = conj_form
self.origin_form = origin_form
self.pron = pron
self.yomi = yomi

def __repr__(self):
representation = self.surface
Expand All @@ -33,6 +54,27 @@ def __repr__(self):
return representation

def __eq__(self, right):
return self.surface == right.surface \
and self.postag == right.postag \
return (
self.surface == right.surface
and self.postag == right.postag
and self.postag2 == right.postag2
and self.yomi == right.yomi)

@property
def feature(self):
feature = []
if self.postag is not None:
feature.append(self.postag)
if self.postag2 is not None:
feature.append(self.postag2)
if self.conj_type is not None:
feature.append(self.conj_type)
if self.conj_form is not None:
feature.append(self.conj_form)
if self.origin_form is not None:
feature.append(self.origin_form)
if self.yomi is not None:
feature.append(self.yomi)
if self.pron is not None:
feature.append(self.pron)
return ','.join(feature)

0 comments on commit dcb7f8c

Please sign in to comment.