In [1]:
import numpy as np
import tensorflow as tf
import time
import os
import sys

import nsm
from nsm import data_utils
from nsm import env_factory
from nsm import graph_factory
from nsm import model_factory
from nsm import agent_factory
from nsm import executor_factory
from nsm import computer_factory
from nsm import word_embeddings

import experiment as exp

FLAGS = tf.app.flags.FLAGS  
tf.app.flags.DEFINE_string('f', '', 'kernel')
tf.logging.set_verbosity(tf.logging.INFO)

In [2]:
# Fill in the path to your data/wikitable folder. 
# By default it is in ~/projects/data/wikitable. 
data_dir= os.path.expanduser('~/projects/data/wikitable')

In [3]:
FLAGS.eval_only = True
FLAGS.eval_use_gpu = False
FLAGS.eval_gpu_id = 0
FLAGS.eval_file = os.path.join(data_dir, 'processed_input/preprocess_14/data_split_1/dev_split.jsonl')
unittest_file = os.path.join(data_dir, 'processed_input/preprocess_14/data_split_1/train_split_shard_90-0.jsonl')
train_file = os.path.join(data_dir, 'processed_input/preprocess_14/train_examples.jsonl')

# Fill in the output folder and experiment name you want to load.
# By default, load the pretrained model in the repo. 
FLAGS.output_dir = os.path.expanduser('~/projects/neural-symbolic-machines/table/wtq/')
FLAGS.experiment_to_eval = 'pretrained_model'
experiment_config = exp.create_experiment_config()

In [6]:
# Load the agent and the environments in the dev set. 
# This usually takes 15-30 sec. 
# If you want to see the training environments, use train_file instead (takes about 75-150 sec). 
fns = [FLAGS.eval_file]
agent, envs = exp.init_experiment(fns, FLAGS.eval_use_gpu, gpu_id=str(FLAGS.eval_gpu_id))
for env in envs:
    env.punish_extra_work = False
env_dict = dict([(env.name, env) for env in envs])

INFO:tensorflow:2831 examples in dataset.
INFO:tensorflow:2108 tables.
INFO:tensorflow:2045 unique tokens in encoder vocab
INFO:tensorflow:2831 examples in the dataset
INFO:tensorflow:creating environment #0
INFO:tensorflow:creating environment #100
INFO:tensorflow:creating environment #200
INFO:tensorflow:creating environment #300
INFO:tensorflow:creating environment #400
INFO:tensorflow:creating environment #500
INFO:tensorflow:creating environment #600
INFO:tensorflow:creating environment #700
INFO:tensorflow:creating environment #800
INFO:tensorflow:creating environment #900
INFO:tensorflow:creating environment #1000
INFO:tensorflow:creating environment #1100
INFO:tensorflow:creating environment #1200
INFO:tensorflow:creating environment #1300
INFO:tensorflow:creating environment #1400
INFO:tensorflow:creating environment #1500
INFO:tensorflow:creating environment #1600
INFO:tensorflow:creating environment #1700
INFO:tensorflow:creating environment #1800
INFO:tensorflow:creating en

# Evaluate on the first 10 environments and show generated programs. 

In [9]:
eval_envs = envs[:10]
dev_avg_return, dev_samples, dev_samples_in_beam = exp.beam_search_eval(agent, eval_envs)
print('Accuracy on the selected {} environments are {}'.format(len(eval_envs), dev_avg_return))
print('Show the generated programs:')
print(exp.show_samples(dev_samples, envs[0].de_vocab, env_dict=env_dict))

INFO:tensorflow:eval, batch 0: 10 envs
INFO:tensorflow:50 samples in beam, batch 0.
INFO:tensorflow:1.7073469162 sec used in evaluator batch 0.
INFO:tensorflow:avg return adjusted from 0.6 to 0.6 based on true n
INFO:tensorflow:10 samples in non-empty beam.
INFO:tensorflow:true n is 10
INFO:tensorflow:10 questions in dev set.
INFO:tensorflow:0.6 dev avg return.
INFO:tensorflow:dev: avg return: 0.6, avg length: 14.1.
Accuracy on the selected 10 environments are 0.6
Show the generated programs:

env nt-34
question: who was the top ranked competitor in this race?
answer: [u'Iryna Shpylova', u'Iryna Shpylova']
program: ( first all_rows ) ( hop v7 r.cyclist-string ) <END>
prediction: [u'iryna shpylova']
return: 1.0
prob is 1.0

env nt-40
question: what was the number of silver medals won by ukraine?
answer: [u'2', u'2.0']
program: ( filter_str_contain_any all_rows [u'ukraine'] r.nation-string ) ( hop v12 r.silver-number ) <END>
prediction: [2.0]
return: 1.0
prob is 1.0

env nt-15
question: 

# Debug the beam search by showing programs in the beam. 

In [11]:
env_id = 'nt-13901'
dev_avg_return, dev_samples, dev_samples_in_beam = exp.beam_search_eval(agent, [env_dict[env_id]])
print('Show the {} programs in beam for environment {}:'.format(len(dev_samples_in_beam), env_id))
print(exp.show_samples(dev_samples_in_beam, envs[0].de_vocab, env_dict=env_dict))

INFO:tensorflow:eval, batch 0: 1 envs
INFO:tensorflow:5 samples in beam, batch 0.
INFO:tensorflow:0.2055580616 sec used in evaluator batch 0.
INFO:tensorflow:avg return adjusted from 1.0 to 1.0 based on true n
INFO:tensorflow:1 samples in non-empty beam.
INFO:tensorflow:true n is 1
INFO:tensorflow:1 questions in dev set.
INFO:tensorflow:1.0 dev avg return.
INFO:tensorflow:dev: avg return: 1.0, avg length: 11.0.
Show the 5 programs in beam for environment nt-13901:

env nt-13901
question: the most points were scored by which player?
answer: [u'Karel Hrom\xe1dka', u'Karel Hrom\xe1dka']
program: ( argmax all_rows r._points-number ) ( hop v10 r.player-string ) <END>
prediction: [u'karel hromadka']
return: 1.0
prob is 0.879124546146

env nt-13901
question: the most points were scored by which player?
answer: [u'Karel Hrom\xe1dka', u'Karel Hrom\xe1dka']
program: ( argmin all_rows r._points-number ) ( hop v10 r.player-string ) <END>
prediction: [u'dawid daniuszewski', u'endre steiner', u'otto