In [1]:
import numpy as np
import tensorflow as tf
import time
import os
import sys

import nsm
from nsm import data_utils
from nsm import env_factory
from nsm import graph_factory
from nsm import model_factory
from nsm import agent_factory
from nsm import executor_factory
from nsm import computer_factory
from nsm import word_embeddings

import experiment as exp

FLAGS = tf.app.flags.FLAGS  
tf.app.flags.DEFINE_string('f', '', 'kernel')
# Set the level to tf.logging.INFO if you want to see more information.
tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
# Fill in the path to your data/wikitable folder. 
# By default it is in ~/projects/data/wikitable. 
data_dir= os.path.expanduser('~/projects/data/wikitable')

In [3]:
FLAGS.eval_only = True
FLAGS.eval_use_gpu = False
FLAGS.eval_gpu_id = 0
FLAGS.max_n_mem = 60
FLAGS.eval_file = os.path.join(data_dir, 'processed_input/preprocess_14/data_split_1/dev_split.jsonl')
unittest_file = os.path.join(data_dir, 'processed_input/preprocess_14/data_split_1/train_split_shard_90-0.jsonl')
train_file = os.path.join(data_dir, 'processed_input/preprocess_14/train_examples.jsonl')

# Fill in the output folder and experiment name you want to load.
# By default, load the pretrained model in the repo. 
FLAGS.output_dir = os.path.expanduser('~/projects/neural-symbolic-machines/table/wtq/')
FLAGS.experiment_to_eval = 'pretrained_model'
experiment_config = exp.create_experiment_config()

In [4]:
# Load the agent and the environments in the dev set. 
# This usually takes 15-30 sec. 
# If you want to see the training environments, use train_file instead (takes about 75-150 sec). 
fns = [FLAGS.eval_file]
agent, envs = exp.init_experiment(fns, FLAGS.eval_use_gpu, gpu_id=str(FLAGS.eval_gpu_id))
for env in envs:
    env.punish_extra_work = False
env_dict = dict([(env.name, env) for env in envs])

# Evaluate on 5 environments / questions and show generated programs. 
Use the environment id (for example, nt-34) to find the question and its accompanying table in the website below (from Stanford NLP group). 
https://nlp.stanford.edu/software/sempre/wikitable/viewer/#203-591

In [5]:
# Use 'eval_envs = envs' to evaluate on the whole validation set. Usually takes 8-10 minutes on a laptop. 
eval_envs = envs[5:10]
dev_avg_return, dev_samples, dev_samples_in_beam = exp.beam_search_eval(agent, eval_envs)
print('Accuracy on the selected {} environments are {}'.format(len(eval_envs), dev_avg_return))
print('Show the generated programs:')
print(exp.show_samples(dev_samples, envs[0].de_vocab, env_dict=env_dict))

Accuracy on the selected 5 environments are 0.8
Show the generated programs:

env nt-24
question: who ranked right after turkey?
answer: [u'Sweden', u'Sweden']
program: ( filter_str_contain_any all_rows [u'turkey'] r.nation-string ) ( next v12 ) ( hop v13 r.nation-string ) <END>
prediction: [u'sweden']
return: 1.0
prob is 1.0

env nt-34
question: who was the top ranked competitor in this race?
answer: [u'Iryna Shpylova', u'Iryna Shpylova']
program: ( first all_rows ) ( hop v7 r.cyclist-string ) <END>
prediction: [u'iryna shpylova']
return: 1.0
prob is 1.0

env nt-15
question: what was the venue when he placed first?
answer: [u'New Delhi, India', u'New Delhi, India']
program: ( argmin all_rows r.position-number ) ( first all_rows ) ( diff v10 v9 r.year-number ) <END>
prediction: [0.0]
return: 0.0
prob is 1.0

env nt-40
question: what was the number of silver medals won by ukraine?
answer: [u'2', u'2.0']
program: ( filter_str_contain_any all_rows [u'ukraine'] r.nation-string ) ( hop v12 

# Debug the beam search by showing programs in the beam. 

In [6]:
env_id = 'nt-13901'
dev_avg_return, dev_samples, dev_samples_in_beam = exp.beam_search_eval(agent, [env_dict[env_id]])
print('Show the {} programs in beam for environment {}:'.format(len(dev_samples_in_beam), env_id))
print(exp.show_samples(dev_samples_in_beam, envs[0].de_vocab, env_dict=env_dict))

Show the 5 programs in beam for environment nt-13901:

env nt-13901
question: the most points were scored by which player?
answer: [u'Karel Hrom\xe1dka', u'Karel Hrom\xe1dka']
program: ( argmax all_rows r._points-number ) ( hop v10 r.player-string ) <END>
prediction: [u'karel hromadka']
return: 1.0
prob is 0.879124546146

env nt-13901
question: the most points were scored by which player?
answer: [u'Karel Hrom\xe1dka', u'Karel Hrom\xe1dka']
program: ( argmin all_rows r._points-number ) ( hop v10 r.player-string ) <END>
prediction: [u'dawid daniuszewski', u'endre steiner', u'otto zimmermann', u'damian reca', u'giovanni cenni', u'karoly sterk']
return: 0.0
prob is 0.108504561195

env nt-13901
question: the most points were scored by which player?
answer: [u'Karel Hrom\xe1dka', u'Karel Hrom\xe1dka']
program: ( maximum all_rows r._points-number ) ( filter_eq all_rows v10 r._points-number ) ( hop v11 r.player-string ) <END>
prediction: [u'karel hromadka']
return: 1.0
prob is 0.0075883843301