Permalink
Switch branches/tags
Nothing to show
Find file Copy path
eff57ac Jan 20, 2018
1 contributor

Users who have contributed to this file

27 lines (18 sloc) 798 Bytes
# coding: utf8
from __future__ import unicode_literals
# import symbols – if you need to use more, add them here
from ...symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
# Add tokenizer exceptions
# Documentation: https://spacy.io/docs/usage/adding-languages#tokenizer-exceptions
# Feel free to use custom logic to generate repetitive exceptions more efficiently.
# If an exception is split into more than one token, the ORTH values combined always
# need to match the original string.
# Exceptions should be added in the following format:
_exc = {
"don't": [
{ORTH: "do", LEMMA: "do"},
{ORTH: "n't", LEMMA: "not", TAG: "RB"}]
}
# To keep things clean and readable, it's recommended to only declare the
# TOKENIZER_EXCEPTIONS at the bottom:
TOKENIZER_EXCEPTIONS = _exc