# Evaluation of NLP and IE/KE systems


### ML Evaluation -- Accuracy

this is from http://www.nltk.org/book/ch06.html

We are building a classifier which classifies names into classes:
    male, female

In [4]:
# most simple version
def gender_features(word):
    return {'last_letter': word[-1]}

In [15]:
import nltk
from nltk.corpus import names

# read training and test data
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
    [(name, 'female') for name in names.words('female.txt')])


Now we split that dataset into training and test, and use or classifier

In [16]:
import random
random.shuffle(labeled_names)

In [17]:
featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]

# split train / test
train_set, test_set = featuresets[500:], featuresets[:500]

# train a NB classifier
classifier = nltk.NaiveBayesClassifier.train(train_set)

# classify one example name
classifier.classify(gender_features('Neo'))


'male'

In [18]:
print(nltk.classify.accuracy(classifier, test_set))


0.768


In [19]:
classifier.show_most_informative_features(5)


Most Informative Features
             last_letter = 'k'              male : female =     43.2 : 1.0
             last_letter = 'a'            female : male   =     35.6 : 1.0
             last_letter = 'f'              male : female =     16.7 : 1.0
             last_letter = 'p'              male : female =     12.6 : 1.0
             last_letter = 'v'              male : female =     11.2 : 1.0


In [25]:
from nltk.corpus import brown

t2 = nltk.DefaultTagger('NN')

def tag_list(tagged_sents):
    return [tag for sent in tagged_sents for (word, tag) in sent]
def apply_tagger(tagger, corpus):
    return [tagger.tag(nltk.tag.untag(sent)) for sent in corpus]
gold = tag_list(brown.tagged_sents(categories='editorial'))
test = tag_list(apply_tagger(t2, brown.tagged_sents(categories='editorial')))
cm = nltk.ConfusionMatrix(gold, test)
print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9))

    |                                         N                      |
    |      N      I      A      J             N             V      N |
    |      N      N      T      J      .      S      ,      B      P |
----+----------------------------------------------------------------+
 NN | <12.5%>     .      .      .      .      .      .      .      . |
 IN |  10.1%     <.>     .      .      .      .      .      .      . |
 AT |   8.6%      .     <.>     .      .      .      .      .      . |
 JJ |   5.8%      .      .     <.>     .      .      .      .      . |
  . |   4.9%      .      .      .     <.>     .      .      .      . |
NNS |   4.8%      .      .      .      .     <.>     .      .      . |
  , |   4.4%      .      .      .      .      .     <.>     .      . |
 VB |   3.5%      .      .      .      .      .      .     <.>     . |
 NP |   3.1%      .      .      .      .      .      .      .     <.>|
----+----------------------------------------------------------------+
(row =