Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Tree: 90ddd4669e
Fetching contributors…

Cannot retrieve contributors at this time

55 lines (41 sloc) 1.83 KB
from nltk.tag.sequential import SequentialBackoffTagger
from nltk.probability import FreqDist
from nltk.tag import ClassifierBasedPOSTagger, TaggerI, str2tuple
from nltk_trainer.featx import phonetics
from nltk_trainer.featx.metaphone import dm
class PhoneticClassifierBasedPOSTagger(ClassifierBasedPOSTagger):
def __init__(self, double_metaphone=False, metaphone=False, soundex=False, nysiis=False, caverphone=False, *args, **kwargs):
self.funs = {}
if double_metaphone:
self.funs['double-metaphone'] = lambda s: dm(unicode(s))
if metaphone:
self.funs['metaphone'] = phonetics.metaphone
if soundex:
self.funs['soundex'] = phonetics.soundex
if nysiis:
self.funs['nysiis'] = phonetics.nysiis
if caverphone:
self.funs['caverphone'] = phonetics.caverphone
# for some reason don't get self.funs if this is done first, but works if done last
ClassifierBasedPOSTagger.__init__(self, *args, **kwargs)
def feature_detector(self, tokens, index, history):
feats = ClassifierBasedPOSTagger.feature_detector(self, tokens, index, history)
s = tokens[index]
for key, fun in self.funs.iteritems():
feats[key] = fun(s)
return feats
class MaxVoteBackoffTagger(SequentialBackoffTagger):
def __init__(self, *taggers):
self._taggers = taggers
def choose_tag(self, tokens, index, history):
tags = FreqDist()
for tagger in self._taggers:, index, history))
return tags.max()
class PatternTagger(TaggerI):
def tag(self, tokens):
# don't import at top since don't want to fail if not installed
from pattern.en import parse
# we don't want chunk tags, and not tokenizing ensures that the number
# of tagged tokens returned is the same as the number of input tokens
return [str2tuple(s) for s in parse(u' '.join(tokens), chunks=False, tokenize=False).split(u' ')]
Jump to Line
Something went wrong with that request. Please try again.