Permalink
Find file
b5ba2f1 Jul 4, 2011
executable file 79 lines (67 sloc) 1.96 KB
#!/usr/bin/env python
"""
An example for part-of-speech tagging.
Copyright 2010,2011 Naoaki Okazaki.
"""
# Separator of field values.
separator = ' '
# Field names of the input data.
fields = 'w num cap sym p1 p2 p3 p4 s1 s2 s3 s4 y'
# Feature template. This template is identical to the one bundled in CRF++
# distribution, but written in a Python object.
templates = (
(('num', 0), ),
(('cap', 0), ),
(('sym', 0), ),
(('p1', 0), ),
(('p2', 0), ),
(('p3', 0), ),
(('p4', 0), ),
(('s1', 0), ),
(('s2', 0), ),
(('s3', 0), ),
(('s4', 0), ),
(('w', 0), ),
(('w', -1), ),
(('w', 1), ),
(('w', -2), ),
(('w', 2), ),
(('w', -2), ('w', -1)),
(('w', -1), ('w', 0)),
(('w', 0), ('w', 1)),
(('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0)),
(('w', -1), ('w', 0), ('w', 1)),
(('w', 0), ('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0), ('w', 1)),
(('w', -1), ('w', 0), ('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0), ('w', 1), ('w', 2)),
(('w', 0), ('w', -1)),
(('w', 0), ('w', -2)),
(('w', 0), ('w', -3)),
(('w', 0), ('w', -4)),
(('w', 0), ('w', -5)),
(('w', 0), ('w', -6)),
(('w', 0), ('w', -7)),
(('w', 0), ('w', -8)),
(('w', 0), ('w', -9)),
(('w', 0), ('w', 1)),
(('w', 0), ('w', 2)),
(('w', 0), ('w', 3)),
(('w', 0), ('w', 4)),
(('w', 0), ('w', 5)),
(('w', 0), ('w', 6)),
(('w', 0), ('w', 7)),
(('w', 0), ('w', 8)),
(('w', 0), ('w', 9)),
)
import crfutils
def feature_extractor(X):
# Apply feature templates to obtain features (in fact, attributes)
crfutils.apply_templates(X, templates)
if X:
# Append BOS and EOS features manually
X[0]['F'].append('__BOS__') # BOS feature
X[-1]['F'].append('__EOS__') # EOS feature
if __name__ == '__main__':
crfutils.main(feature_extractor, fields=fields, sep=separator)