In [4]:
import inspect
from util import make_nerda_train_dev_test, nerda_format_just_iob, conlleval
import nltk
train, dev, test = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
from util 

In [5]:
train

defaultdict(list,
            {'sentences': [['थः',
               'नं',
               'छम्ह',
               'शक्तिशाली',
               'खः',
               'धयागु',
               'आत्मविश्वास',
               'ज्वनाः',
               'न्ह्याःवःगु',
               'खनेदु'],
              ['नसा', 'कमजोर', 'जुलकि', 'नं', 'ला', 'थहां', 'वइ'],
              ['सरकारया',
               'निंतिं',
               'अज्याःगु',
               'अवसर',
               'धइगु',
               'बुद्धया',
               'नामय्',
               'थुलि',
               'ध्यबा',
               'खर्च',
               'याना',
               'उलि',
               'ध्यबा',
               'खर्च',
               'याना',
               'धकाः',
               'कनेगु',
               'अले',
               'थः',
               'पार्टीया',
               'कार्यकर्तातय्गु',
               'खल्तिइ',
               'ध्यबा',
               'प्वंकेगु',
               'बाहेक',
               'मेगु',
               'छुं',

In [6]:
def to_padded_tok_tag_tuples_from_nerda(partition):
    tokens = []
    tags = []
    for sent in partition['sentences']:
        tokens.append("<S>")
        for tok in sent:
            tokens.append(tok)
        tokens.append("</S>")
    
    for sent in partition['tags']:
        tags.append("O")
        for tag in sent:
            tags.append(tag)
        tags.append("O")
    
    return list(zip(tokens, tags))

In [7]:
train_tup = to_padded_tok_tag_tuples_from_nerda(train)
dev_tup = to_padded_tok_tag_tuples_from_nerda(dev)
test_tup = to_padded_tok_tag_tuples_from_nerda(test)

In [8]:
test_tup[-5:]

[('योसेफ', 'O'),
 ('अन', 'O'),
 ('वने', 'O'),
 ('ग्\u200dयात', 'O'),
 ('</S>', 'O')]

In [13]:
def experiment_011_naive_bayes_unigram():
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    
    trainfeat = [({"word": tok}, lab) for (tok, lab) in train]
    testfeat  = [({"word": tok}, lab) for (tok, lab) in test]
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)
    train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    test_pred = list(map(classifier.classify, [tok for tok, lab in testfeat]))
    train_toks, train_true = zip(*train)
    test_toks, test_true = zip(*test)
    # considering inspect.currentframe().f_code.co_name as a way to
    # keep track of experiments, when running a battery of experiments
    results = conlleval(test_toks, test_true, test_pred)
    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [14]:
experiment_011_naive_bayes_unigram()

processed 268 tokens with 13 phrases; found: 46 phrases; correct: 1.
accuracy:  63.81%; precision:   2.17%; recall:   7.69%; FB1:   3.39
                 : precision:   2.17%; recall:   7.69%; FB1:   3.39  46
                 : precision:   2.17%; recall:   7.69%; FB1:   3.39  46



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 46,
 'num_correct': 1,
 'accuracy': 63.81,
 'precision': 2.17,
 'recall': 7.69,
 'fb1': 3.39,
 'experiment_name': 'experiment_011_naive_bayes_unigram'}

In [17]:
def experiment_011_maxent_unigram():
    
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    
    trainfeat = [({"word": tok}, lab) for (tok, lab) in train]
    testfeat  = [({"word": tok}, lab) for (tok, lab) in test]
    classifier = nltk.MaxentClassifier.train(trainfeat)
    train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    test_pred = list(map(classifier.classify, [tok for tok, lab in testfeat]))
    train_toks, train_true = zip(*train)
    test_toks, test_true = zip(*test)
    # considering inspect.currentframe().f_code.co_name as a way to
    # keep track of experiments, when running a battery of experiments
    results = conlleval(test_toks, test_true, test_pred)
    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [18]:
experiment_011_maxent_unigram()

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.62252        0.850
             3          -0.49077        0.850
             4          -0.42280        0.850
             5          -0.38108        0.850
             6          -0.35281        0.850
             7          -0.33236        0.850
             8          -0.31687        0.850
             9          -0.30472        0.850
            10          -0.29494        0.850
            11          -0.28689        0.850
            12          -0.28015        0.850
            13          -0.27442        0.850
            14          -0.26949        0.850
            15          -0.26521        0.850
            16          -0.26145        0.850
            17          -0.25813        0.850
            18          -0.25516        0.850
            19          -0.25251        0.850
 

{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 52,
 'num_correct': 0,
 'accuracy': 52.24,
 'precision': 0.0,
 'recall': 0.0,
 'fb1': 0.0,
 'experiment_name': 'experiment_011_maxent_unigram'}

In [41]:
from util import featurize_wordandtag_pseudo_bigram
def experiment_012_naive_bayes_pseudo_bigram_dishonest():
    """this is dishonest because it uses the true previous tags: it
    should use the predicted previous tags

    """
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_pseudo_bigram(train))
    testfeat = list(featurize_wordandtag_pseudo_bigram(test))
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)

    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #conlleval(train_toks, train_true, train_pred)

    test_pred = list(map(classifier.classify, [tok for tok, lab in testfeat]))
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results


In [42]:
experiment_012_naive_bayes_pseudo_bigram_dishonest()

processed 268 tokens with 13 phrases; found: 17 phrases; correct: 8.
accuracy:  97.39%; precision:  47.06%; recall:  61.54%; FB1:  53.33
                 : precision:  47.06%; recall:  61.54%; FB1:  53.33  17
                 : precision:  47.06%; recall:  61.54%; FB1:  53.33  17



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 17,
 'num_correct': 8,
 'accuracy': 97.39,
 'precision': 47.06,
 'recall': 61.54,
 'fb1': 53.33,
 'experiment_name': 'experiment_012_naive_bayes_pseudo_bigram_dishonest'}

In [50]:
def experiment_012_naive_bayes_pseudo_bigram_honest():
    """uses the classify argument for the feature extractor so that tag
    related features are based on predictions

    """
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_pseudo_bigram(train))
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #util.conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_pseudo_bigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)

    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results


In [51]:
experiment_012_naive_bayes_pseudo_bigram_honest()

processed 268 tokens with 13 phrases; found: 13 phrases; correct: 8.
accuracy:  85.45%; precision:  61.54%; recall:  61.54%; FB1:  61.54
                 : precision:  61.54%; recall:  61.54%; FB1:  61.54  13
                 : precision:  61.54%; recall:  61.54%; FB1:  61.54  13



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 13,
 'num_correct': 8,
 'accuracy': 85.45,
 'precision': 61.54,
 'recall': 61.54,
 'fb1': 61.54,
 'experiment_name': 'experiment_012_naive_bayes_pseudo_bigram_honest'}

In [56]:
def experiment_013_maxent_pseudo_bigram():
    """this is a maximum entropy model that uses the current and
    preceding words as separate features (not joint)"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_pseudo_bigram(train))
    classifier = nltk.MaxentClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_pseudo_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_pseudo_bigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]

    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [57]:
experiment_013_maxent_pseudo_bigram()

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.48510        0.922
             3          -0.35873        0.957
             4          -0.28760        0.976
             5          -0.24174        0.982
             6          -0.20946        0.986
             7          -0.18538        0.989
             8          -0.16666        0.991
             9          -0.15166        0.992
            10          -0.13934        0.992
            11          -0.12903        0.993
            12          -0.12027        0.993
            13          -0.11273        0.993
            14          -0.10616        0.994
            15          -0.10039        0.994
            16          -0.09527        0.995
            17          -0.09070        0.995
            18          -0.08659        0.995
            19          -0.08288        0.995
 

{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 19,
 'num_correct': 10,
 'accuracy': 80.97,
 'precision': 52.63,
 'recall': 76.92,
 'fb1': 62.5,
 'experiment_name': 'experiment_013_maxent_pseudo_bigram'}

In [19]:
from util import featurize_wordandtag_bigram
def experiment_014_naivebayes_bigram():
    """ this is a maxent model using proper bigrams"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_bigram(train))
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_bigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

def experiment_014_maxent_bigram():
    """ this is a maxent model using proper bigrams"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_bigram(train))
    classifier = nltk.MaxentClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_bigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [20]:
experiment_014_naivebayes_bigram()

processed 268 tokens with 13 phrases; found: 24 phrases; correct: 10.
accuracy:  72.01%; precision:  41.67%; recall:  76.92%; FB1:  54.05
                 : precision:  41.67%; recall:  76.92%; FB1:  54.05  24
                 : precision:  41.67%; recall:  76.92%; FB1:  54.05  24



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 24,
 'num_correct': 10,
 'accuracy': 72.01,
 'precision': 41.67,
 'recall': 76.92,
 'fb1': 54.05,
 'experiment_name': 'experiment_014_naivebayes_bigram'}

In [61]:
experiment_014_maxent_bigram()

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.43515        0.949
             3          -0.30674        0.990
             4          -0.23896        0.993
             5          -0.19642        0.996
             6          -0.16708        0.997
             7          -0.14558        0.997
             8          -0.12913        0.997
             9          -0.11612        0.997
            10          -0.10558        0.997
            11          -0.09686        0.997
            12          -0.08953        0.997
            13          -0.08328        0.997
            14          -0.07788        0.997
            15          -0.07317        0.997
            16          -0.06903        0.997
            17          -0.06536        0.997
            18          -0.06208        0.997
            19          -0.05913        0.997
 

{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 17,
 'num_correct': 10,
 'accuracy': 83.96,
 'precision': 58.82,
 'recall': 76.92,
 'fb1': 66.67,
 'experiment_name': 'experiment_014_maxent_bigram'}

In [23]:
def featurize_wordandtag_trigram(tokens, classify=False):
    """actually the previous was not really bigram, only two word context
    here's a better bigram using joint features

    input is a list/iterable of tokens, output/generator is list of dictionary features, like
    [{word_nm1: the, word_n: dog}]
    if tok == <s>, word_nm1 = "</s>" (padding)

    """
    prev_tok = ["</S>","</S>"] # end of sentence marker
    prev_lab = ["O", "O"]
    for tok, lab in tokens:
        feature_dict = {}
        # previous words
        feature_dict["word_n"] = tok
        feature_dict["word_n-1"] = prev_tok[-1]
        feature_dict["word_n-2"] = prev_tok[-2]        
        feature_dict["word_n-1,word_n"] = prev_tok[-1] + ","  + tok
        feature_dict["word_n-2,word_n-1,word_n"] = prev_tok[-2] + ","  + prev_tok[-1] + "," + tok
        
        # prev labels
        feature_dict["lab_n-1"] = prev_lab[-1]
        feature_dict["lab_n-2"] = prev_lab[-2]
        feature_dict["lab_n-2,lab_n-1"] = prev_lab[-2] + "," + prev_lab[-1]
        
        #combined words and labels
        # word_n plus one tag
        feature_dict["lab_n-1,word_n"] = prev_lab[-1] + ","  + tok
        feature_dict["lab_n-2,word_n"] = prev_lab[-2] + ","  + tok
        # word_n-1 plus one tag
        feature_dict["lab_n-1,word_n-1"] = prev_lab[-1] + ","  + prev_tok[-1]
        feature_dict["lab_n-2,word_n-1"] = prev_lab[-2] + ","  + prev_tok[-1]
        # word_n-2 plus one tag
        feature_dict["lab_n-1,word_n-2"] = prev_lab[-1] + ","  + prev_tok[-2]
        feature_dict["lab_n-1,word_n-2"] = prev_lab[-1] + ","  + prev_tok[-2]
        # word_n plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n"] = prev_lab[-2] + "," + prev_lab[-1] + ","  + tok
        # word_n-1 plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n-1"] = prev_lab[-2] + "," + prev_lab[-1] + ","  + prev_tok[-1]
        # word_n-2 plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n-1"] = prev_lab[-2] + "," + prev_lab[-1] + ","  + prev_tok[-2]
        # word_n and n-1 plus one tag
        feature_dict["lab_n-1,word_n-1,word_n"] = prev_lab[-1] + "," + prev_tok[-1] + ","  + tok
        feature_dict["lab_n-2,word_n-1,word_n"] = prev_lab[-2] + "," + prev_tok[-1] + ","  + tok
        # word_n and n-2 plus one tag
        feature_dict["lab_n-1,word_n-1,word_n"] = prev_lab[-1] + "," + prev_tok[-2] + ","  + tok
        feature_dict["lab_n-2,word_n-1,word_n"] = prev_lab[-2] + "," + prev_tok[-2] + ","  + tok
        # word_n-1 and n-2 plus one tag
        feature_dict["lab_n-1,word_n-2,word_n-1"] = prev_lab[-1] + "," + prev_tok[-2] + "," + prev_tok[-1]
        feature_dict["lab_n-2,word_n-2,word_n-1"] = prev_lab[-2] + "," + prev_tok[-2] + "," + prev_tok[-1]
        # word_n and n-1 plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n-1,word_n"] = prev_lab[-2] + "," + prev_lab[-1] + "," + prev_tok[-1] + ","  + tok
        # word_n and n-2 plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n-2,word_n"] = prev_lab[-2] + "," + prev_lab[-1] + "," + prev_tok[-2] + ","  + tok
        # word_n-1 and n-2 plus two tags
        feature_dict["lab_n-2,lab_n-1,word_n-2,word_n-1"] = prev_lab[-2] + "," + prev_lab[-1] + "," + prev_tok[-2] + ","  + prev_tok[-1]
        # all
        feature_dict["lab_n-2,lab_n-1,word_n-2,word_n-1,word_n"] = prev_lab[-2] + ",", prev_lab[-1] + "," + prev_tok[-2] + "," + prev_tok[-1] + "," + tok
        prev_tok[-2] = prev_tok[-1]
        prev_tok[-1] = tok
        if classify: # this is the part that makes it honest fair, see below
            lab = classify(feature_dict)
        prev_lab[-2] = prev_lab[-1]
        prev_lab[-1] = lab
        yield feature_dict, lab

In [26]:
def experiment_016_naivebayes_trigram():
    """ this is a naive bayes model using trigrams"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_trigram(train))
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_trigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

def experiment_016_maxent_trigram():
    """ this is a maxent model using trigrams"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_trigram(train))
    classifier = nltk.MaxentClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_trigram(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [27]:
experiment_016_naivebayes_trigram()

processed 268 tokens with 13 phrases; found: 54 phrases; correct: 7.
accuracy:  62.31%; precision:  12.96%; recall:  53.85%; FB1:  20.90
                 : precision:  12.96%; recall:  53.85%; FB1:  20.90  54
                 : precision:  12.96%; recall:  53.85%; FB1:  20.90  54



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 54,
 'num_correct': 7,
 'accuracy': 62.31,
 'precision': 12.96,
 'recall': 53.85,
 'fb1': 20.9,
 'experiment_name': 'experiment_016_naivebayes_trigram'}

In [28]:
experiment_016_maxent_trigram()

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.38046        0.977
             3          -0.26011        0.994
             4          -0.20167        0.996
             5          -0.16586        0.997
             6          -0.14134        0.998
             7          -0.12340        0.998
             8          -0.10967        0.998
             9          -0.09880        0.998
            10          -0.08997        0.998
            11          -0.08265        0.998
            12          -0.07648        0.998
            13          -0.07121        0.998
            14          -0.06665        0.998
            15          -0.06267        0.998
            16          -0.05916        0.998
            17          -0.05604        0.998
            18          -0.05326        0.998
            19          -0.05075        0.998
 

{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 12,
 'num_correct': 9,
 'accuracy': 91.42,
 'precision': 75.0,
 'recall': 69.23,
 'fb1': 72.0,
 'experiment_name': 'experiment_016_maxent_trigram'}

In [70]:
train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
#dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

trainfeat = list(featurize_wordandtag_trigram(train))
classifier = nltk.MaxentClassifier.train(trainfeat)

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.38046        0.977
             3          -0.26011        0.994
             4          -0.20167        0.996
             5          -0.16586        0.997
             6          -0.14134        0.998
             7          -0.12340        0.998
             8          -0.10967        0.998
             9          -0.09880        0.998
            10          -0.08997        0.998
            11          -0.08265        0.998
            12          -0.07648        0.998
            13          -0.07121        0.998
            14          -0.06665        0.998
            15          -0.06267        0.998
            16          -0.05916        0.998
            17          -0.05604        0.998
            18          -0.05326        0.998
            19          -0.05075        0.998
 

In [35]:
def featurize_wordandtag_trigram_backward(tokens, classify=False):
    """ backwards trigram features: various permutations of 3 word/2 tag following 
    context

    input is a list/iterable of tokens, output/generator is list of dictionary features, like
    [{word_nm1: the, word_n: dog}]
    if tok == <s>, word_nm1 = "</s>" (padding)

    """
    next_tok = ["<S>","<S>"] # beginning of sentence marker
    next_lab = ["O", "O"]
    output = []
    for tok, lab in reversed(tokens):
        feature_dict = {}
        # following words
        feature_dict["word_n"] = tok
        feature_dict["word_n+1"] = next_tok[-1]
        feature_dict["word_n+2"] = next_tok[-2]        
        feature_dict["word_n+1,word_n"] = next_tok[-1] + ","  + tok
        feature_dict["word_n+2,word_n+1,word_n"] = next_tok[-2] + ","  + next_tok[-1] + "," + tok
    
        # following labels
        feature_dict["lab_n+1"] = next_lab[-1]
        feature_dict["lab_n+2"] = next_lab[-2]
        feature_dict["lab_n+2,lab_n+1"] = next_lab[-2] + "," + next_lab[-1]
        
        #combined words and labels
        # word_n plus one tag
        feature_dict["lab_n+1,word_n"] = next_lab[-1] + ","  + tok
        feature_dict["lab_n+2,word_n"] = next_lab[-2] + ","  + tok
        # word_n+1 plus one tag
        feature_dict["lab_n+1,word_n+1"] = next_lab[-1] + ","  + next_tok[-1]
        feature_dict["lab_n+2,word_n+1"] = next_lab[-2] + ","  + next_tok[-1]
        # word_n+2 plus one tag
        feature_dict["lab_n+1,word_n+2"] = next_lab[-1] + ","  + next_tok[-2]
        feature_dict["lab_n+1,word_n+2"] = next_lab[-1] + ","  + next_tok[-2]
        # word_n plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n"] = next_lab[-2] + "," + next_lab[-1] + ","  + tok
        # word_n+1 plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n+1"] = next_lab[-2] + "," + next_lab[-1] + ","  + next_tok[-1]
        # word_n+2 plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n+1"] = next_lab[-2] + "," + next_lab[-1] + ","  + next_tok[-2]
        # word_n and n+1 plus one tag
        feature_dict["lab_n+1,word_n+1,word_n"] = next_lab[-1] + "," + next_tok[-1] + ","  + tok
        feature_dict["lab_n+2,word_n+1,word_n"] = next_lab[-2] + "," + next_tok[-1] + ","  + tok
        # word_n and n+2 plus one tag
        feature_dict["lab_n+1,word_n+1,word_n"] = next_lab[-1] + "," + next_tok[-2] + ","  + tok
        feature_dict["lab_n+2,word_n+1,word_n"] = next_lab[-2] + "," + next_tok[-2] + ","  + tok
        # word_n+1 and n+2 plus one tag
        feature_dict["lab_n+1,word_n+2,word_n+1"] = next_lab[-1] + "," + next_tok[-2] + "," + next_tok[-1]
        feature_dict["lab_n+2,word_n+2,word_n+1"] = next_lab[-2] + "," + next_tok[-2] + "," + next_tok[-1]
        # word_n and n+1 plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n+1,word_n"] = next_lab[-2] + "," + next_lab[-1] + "," + next_tok[-1] + ","  + tok
        # word_n and n-2 plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n+2,word_n"] = next_lab[-2] + "," + next_lab[-1] + "," + next_tok[-2] + ","  + tok
        # word_n-1 and n-2 plus two tags
        feature_dict["lab_n+2,lab_n+1,word_n+2,word_n+1"] = next_lab[-2] + "," + next_lab[-1] + "," + next_tok[-2] + ","  + next_tok[-1]
        # all
        feature_dict["lab_n+2,lab_n+1,word_n+2,word_n+1,word_n"] = next_lab[-2] + ",", next_lab[-1] + "," + next_tok[-2] + "," + next_tok[-1] + "," + tok
        
        next_tok[-2] = next_tok[-1]
        next_tok[-1] = tok
        if classify: # this is the part that makes it honest fair
            lab = classify(feature_dict)
        next_lab[-2] = next_lab[-1]
        next_lab[-1] = lab
        output.append((feature_dict, lab))
    return(reversed(output))

In [36]:
def experiment_025_naivebayes_trigram_backward():
    """ this is a naive bayes model using trigrams of backward context"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_trigram_backward(train))
    classifier = nltk.NaiveBayesClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_trigram_backward(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results
    
def experiment_025_maxent_trigram_backward():
    """ this is a naive bayes model using trigrams of backward context"""
    train_nerda, dev_nerda, test_nerda = make_nerda_train_dev_test(format_fn=nerda_format_just_iob)
    train = to_padded_tok_tag_tuples_from_nerda(train_nerda)
    #dev_tup = to_padded_tok_tag_tuples_from_nerda(dev_nerda)
    test = to_padded_tok_tag_tuples_from_nerda(test_nerda)
    

    trainfeat = list(featurize_wordandtag_trigram_backward(train))
    classifier = nltk.MaxentClassifier.train(trainfeat)

    # this way predicts over-optimistically because the preceding tags/labels are known
    #train_pred = list(map(classifier.classify, [tok for tok, lab in trainfeat]))
    #train_toks, train_true = zip(*train)
    #util.conlleval(train_toks, train_true, train_pred)

    # this way should be more fair/honest
    #train_pred = list(featurize_wordandtag_bigram(train, classify=classifier.classify))
    #conlleval(train_toks, train_true, [pred for _, pred in train_pred])

    # fair/honest for test
    testfeat_pred = list(featurize_wordandtag_trigram_backward(test, classify=classifier.classify))
    test_pred = [pred for _, pred in testfeat_pred]
    test_toks, test_true = zip(*test)
    results = conlleval(test_toks, test_true, test_pred)

    experiment_name = inspect.currentframe().f_code.co_name
    results["experiment_name"] = experiment_name
    return results

In [37]:
experiment_024_naivebayes_trigram_backward()

processed 268 tokens with 13 phrases; found: 77 phrases; correct: 0.
accuracy:  60.07%; precision:   0.00%; recall:   0.00%; FB1:   0.00
                 : precision:   0.00%; recall:   0.00%; FB1:   0.00  77
                 : precision:   0.00%; recall:   0.00%; FB1:   0.00  77



{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 77,
 'num_correct': 0,
 'accuracy': 60.07,
 'precision': 0.0,
 'recall': 0.0,
 'fb1': 0.0,
 'experiment_name': 'experiment_024_naivebayes_trigram_backward'}

In [38]:
experiment_024_maxent_trigram_backward()

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -1.09861        0.460
             2          -0.38413        0.945
             3          -0.26879        0.988
             4          -0.20948        0.996
             5          -0.17243        0.998
             6          -0.14688        0.998
             7          -0.12813        0.998
             8          -0.11375        0.998
             9          -0.10237        0.998
            10          -0.09312        0.998
            11          -0.08545        0.998
            12          -0.07899        0.998
            13          -0.07346        0.998
            14          -0.06869        0.998
            15          -0.06452        0.998
            16          -0.06085        0.998
            17          -0.05758        0.998
            18          -0.05467        0.998
            19          -0.05204        0.998
 

{'num_tokens': 268,
 'num_phrases': 13,
 'num_found': 3,
 'num_correct': 0,
 'accuracy': 56.34,
 'precision': 0.0,
 'recall': 0.0,
 'fb1': 0.0,
 'experiment_name': 'experiment_024_maxent_trigram_backward'}