In [1]:
import nltk

In [3]:
def learn_default_tagger(sentence):
    tokens = nltk.word_tokenize(sentence)
    tagger = nltk.DefaultTagger('NN') # Set unknowns to nouns, NN
    pos_enabled_tags = tagger.tag(tokens)
    return pos_enabled_tags

In [9]:
def learn_regex_tagger(sentence):
    custom_patterns = [
        (r'.*ing$', 'ADJECTIVE'), # running
        (r'.*ly$', 'ADVERB'), # willingly
        (r'.*ion$', 'NOUN'), # intimation
        (r'(.*ate|.*en|is)$', 'VERB'), # terminate, darken, lighten
        (r'^an$', 'INDEFINITE-ARTICLE'), # terminate
        (r'^(with|on|at)$', 'PREPOSITION'), # on
        (r'^\-?[0-9]+(\.[0-9]+)$', 'NUMBER'), # -1.0, 13245.123
        (r'.*$', None)
    ]
    tagger = nltk.RegexpTagger(custom_patterns)
    tokens = nltk.word_tokenize(sentence)
    pos_enabled_tags = tagger.tag(tokens)
    return pos_enabled_tags

In [5]:
def learn_lookup_tagger(sentence):
    mapping = {
        '.': '.',
        'place': 'NN',
        'on': 'IN',
        'earth': 'NN',
        'Mysore': 'NNP',
        'is': 'VBZ',
        'an': 'DT',
        'amazing': 'JJ'
    }
    tagger = nltk.UnigramTagger(model=mapping)
    tokens = nltk.word_tokenize(sentence)
    pos_enabled_tags = tagger.tag(tokens)
    return pos_enabled_tags

In [6]:
test_sentence = 'Mysore is an amazing place on earth. I have visited Mysore 10 times.'

In [7]:
learn_default_tagger(test_sentence)

[('Mysore', 'NN'),
 ('is', 'NN'),
 ('an', 'NN'),
 ('amazing', 'NN'),
 ('place', 'NN'),
 ('on', 'NN'),
 ('earth', 'NN'),
 ('.', 'NN'),
 ('I', 'NN'),
 ('have', 'NN'),
 ('visited', 'NN'),
 ('Mysore', 'NN'),
 ('10', 'NN'),
 ('times', 'NN'),
 ('.', 'NN')]

In [10]:
learn_regex_tagger(test_sentence)

[('Mysore', None),
 ('is', 'VERB'),
 ('an', 'INDEFINITE-ARTICLE'),
 ('amazing', 'ADJECTIVE'),
 ('place', None),
 ('on', 'PREPOSITION'),
 ('earth', None),
 ('.', None),
 ('I', None),
 ('have', None),
 ('visited', None),
 ('Mysore', None),
 ('10', None),
 ('times', None),
 ('.', None)]

In [12]:
learn_lookup_tagger(test_sentence)

[('Mysore', 'NNP'),
 ('is', 'VBZ'),
 ('an', 'DT'),
 ('amazing', 'JJ'),
 ('place', 'NN'),
 ('on', 'IN'),
 ('earth', 'NN'),
 ('.', '.'),
 ('I', None),
 ('have', None),
 ('visited', None),
 ('Mysore', 'NNP'),
 ('10', None),
 ('times', None),
 ('.', '.')]