In [4]:
import sys
from glob import glob
from collections import Counter, defaultdict, OrderedDict
from operator import itemgetter
import itertools
import numpy as np
from scipy.misc import logsumexp
from scipy import sparse
import matplotlib.pyplot as plot
import json
import traceback

from tuna import *
from featurefunctions import cross_product_features, null_features
import training_instances as inst
from learning import score, cost
import config

In [5]:
def evaluate_pretrained(
        filenames=glob("../TUNA/corpus/singular/furniture/*.xml"),
        phi=cross_product_features,
        weights={},
        verbose=True,
        eval_num='',
        options=None):
    if options is None:
        options = config.options()

    get_instances = (inst.get_generation_instances
                     if options.generation else
                     inst.get_plural_instances
                     if 'plural' in options.data_dir else
                     inst.get_singular_instances)
    D = get_instances(filenames=filenames)
    # messages is the set of utterances observed in training, as a proxy for
    # the set of all possible utterances. TODO: Can we do this in a more principled way?
    messages = [d[0] for d in D]

    # Train-test split:
    train, test = None, None
    if options.train_percentage > 0.0: # where 0, no split:
        random.shuffle(D)
        train_size = int(round(len(D)*options.train_percentage, 0))
        train = D[ : train_size]
        test = D[train_size: ]
    else:
        train = D
        test = D

    
    dump_params(weights, config.get_file_path('params.json'))

    # Optionally view weights:
    if verbose:
        for key, val in sorted(weights.items(), key=itemgetter(1), reverse=True):
            if val != 0.0:
                print key, val

    # Accuracy evaluation:
    results = defaultdict(int)
    with open(config.get_file_path('predictions.%s.jsons' % eval_num), 'w') as outfile:
        for (id, x, y, domain) in test:
            prediction = predict(x=x, w=weights, phi=phi,
                                 messages=messages,
                                 classes=domain)
            json.dump({'id': id, 'input': x,
                       'gold': y, 'prediction': prediction}, outfile)
            outfile.write('\n')
            results[y==prediction] += 1
    acc = float(results[True])/len(test)

    if verbose:
        print "Accuracy: %s of %s (%0.02f%%)" % (results[True], len(test), acc)
    return acc


In [14]:
def main():
    sys.argv=['program', '--run_dir', './']
    options = config.options()
    with open('runs/66/params.json', 'r') as infile:
        weights = json.load(infile)
    filenames = glob("../TUNA/corpus/singular/furniture/*.xml")
    for featname, phi in (#('Random', null_features, log_loss_grad),
        #('Literal listener', cross_product_features, log_loss_grad),
        ('RSA', cross_product_features),):
        accs = np.array([evaluate_pretrained(filenames=filenames, phi=phi, weights=weights,
                                  eval_num=i, options=options)
                         for i in range(1)])
        print 'Finished %d random 80/20 splits using %s on %s' % \
            (options.evaluate_reps, featname, options.data_dir)
        print 'accuracy mean: %0.2f' % accs.mean()
        print 'accuracy std: %0.2f' % accs.std()
        
main()

AttributeError: 'Namespace' object has no attribute 'generation'