Skip to content

Commit

Permalink
Malay language support (#12602)
Browse files Browse the repository at this point in the history
* add malay lang

* fix token len

* black format

* reformat conftest malay

* remove exceptions not exist in dbp

* format code
  • Loading branch information
khursani8 committed May 17, 2023
1 parent 58779c2 commit 873c16a
Show file tree
Hide file tree
Showing 13 changed files with 3,936 additions and 0 deletions.
24 changes: 24 additions & 0 deletions spacy/lang/ms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from .syntax_iterators import SYNTAX_ITERATORS
from ...language import Language, BaseDefaults


class MalayDefaults(BaseDefaults):
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
syntax_iterators = SYNTAX_ITERATORS
lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS


class Malay(Language):
lang = "ms"
Defaults = MalayDefaults


__all__ = ["Malay"]

0 comments on commit 873c16a

Please sign in to comment.