In [12]:
from tqdm import tqdm
import json
import os
from datetime import datetime
import time
import logging
from utils import *
from config import parameters as conf
from torch import nn
import torch
import torch.optim as optim
from Model_new import Bert_model
from tabulate import tabulate # to formate the table printing, this is optional

In [4]:
from transformers import BertTokenizer
from transformers import BertConfig
tokenizer = BertTokenizer.from_pretrained(conf.model_size)
model_config = BertConfig.from_pretrained(conf.model_size)

In [5]:
op_list = read_txt(conf.op_list_file)
op_list = [op + '(' for op in op_list]
op_list = ['EOF', 'UNK', 'GO', ')'] + op_list
const_list = read_txt(conf.const_list_file)
const_list = [const.lower().replace('.', '_') for const in const_list]
reserved_token_size = len(op_list) + len(const_list)

print(op_list)
print(const_list)

['EOF', 'UNK', 'GO', ')', 'add(', 'subtract(', 'multiply(', 'divide(', 'exp(', 'greater(', 'table_sum(', 'table_average(', 'table_max(', 'table_min(']
['const_2', 'const_1', 'const_3', 'const_4', 'const_5', 'const_6', 'const_7', 'const_8', 'const_9', 'const_10', 'const_100', 'const_1000', 'const_10000', 'const_100000', 'const_1000000', 'const_10000000', 'const_1000000000', 'const_m1', '#0', '#1', '#2', '#3', '#4', '#5', '#6', '#7', '#8', '#9', '#10', 'none']


In [6]:
with open(conf.valid_file) as f_in:
    original_data = json.load(f_in)

valid_data, valid_examples, op_list, const_list = \
    read_examples(input_path=conf.valid_file, tokenizer=tokenizer,
                  op_list=op_list, const_list=const_list)

100%|██████████| 883/883 [00:21<00:00, 40.92it/s]


In [7]:
kwargs = {"examples": valid_examples,
          "tokenizer": tokenizer,
          "max_seq_length": conf.max_seq_length,
          "max_program_length": conf.max_program_length,
          "is_training": False,
          "op_list": op_list,
          "op_list_size": len(op_list),
          "const_list": const_list,
          "const_list_size": len(const_list),
          "verbose": True}

valid_features = convert_examples_to_features(**kwargs)
print("Number of validation samples:",len(valid_examples))

Number of validation samples: 883


In [9]:
PATH = 'models/model.pt'
model = Bert_model(num_decoder_layers=conf.num_decoder_layers,
                       hidden_size=model_config.hidden_size,
                       dropout_rate=conf.dropout_rate,
                       program_length=conf.max_program_length,
                       input_length=conf.max_seq_length,
                       op_list=op_list,
                       const_list=const_list)

model = nn.DataParallel(model)
model.to(conf.device)
model.load_state_dict(torch.load(PATH))
model.eval()  # Set the model to evaluation mode

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


DataParallel(
  (module): Bert_model(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                

In [10]:
mode='valid'
pred_list = []
pred_unk = []

data_iterator = DataLoader(
    is_training=False, data=valid_features, batch_size=conf.batch_size_test, reserved_token_size=reserved_token_size, shuffle=False)

k = 0
all_results = []
with torch.no_grad():
    for x in tqdm(data_iterator):
        input_ids = x['input_ids']
        input_mask = x['input_mask']
        segment_ids = x['segment_ids']
        program_ids = x['program_ids']
        program_mask = x['program_mask']
        option_mask = x['option_mask']

        ori_len = len(input_ids)
        for each_item in [input_ids, input_mask, segment_ids, program_ids, program_mask, option_mask]:
            if ori_len < conf.batch_size_test:
                each_len = len(each_item[0])
                pad_x = [0] * each_len
                each_item += [pad_x] * (conf.batch_size_test - ori_len)

        input_ids = torch.tensor(input_ids).to(conf.device)
        input_mask = torch.tensor(input_mask).to(conf.device)
        segment_ids = torch.tensor(segment_ids).to(conf.device)
        program_ids = torch.tensor(program_ids).to(conf.device)
        program_mask = torch.tensor(program_mask).to(conf.device)
        option_mask = torch.tensor(option_mask).to(conf.device)

        logits = model(False, input_ids, input_mask, segment_ids, option_mask, program_ids, program_mask, device=conf.device)

        for this_logit, this_id in zip(logits.tolist(), x["unique_id"]):
          # print(this_logit, int(this_id))
          results = RawResult(unique_id=int(this_id),logits=this_logit,loss=None)
          all_results.append(results)
          
        # # run only for one sample
        # break
  # Outside of For loop
# outside of width
all_predictions, all_nbest = compute_predictions(
        valid_examples,
        valid_features,
        all_results,
        n_best_size=conf.n_best_size,
        max_program_length=conf.max_program_length,
        tokenizer=tokenizer,
        op_list=op_list,
        op_list_size=len(op_list),
        const_list=const_list,
        const_list_size=len(const_list))

# print(all_predictions)
# print(all_nbest)




100%|██████████| 56/56 [01:11<00:00,  1.28s/it]


In [26]:
# Filter out Right Predictions
prediction_keys = list(all_predictions['pred_programs'].keys())
ground_truth_keys = list(all_predictions['ref_programs'].keys())

fair_prediction_indeces = []
for i in range(len(prediction_keys)):
    pred = all_predictions['pred_programs'][prediction_keys[i]]
    gt = all_predictions['ref_programs'][ground_truth_keys[i]]
    gt_score = valid_examples[i][4]
    table = original_data[i]['table']
    invalid_flag, pred_score = eval_program(pred, table)
    if len(pred) == len(gt) and gt_score == pred_score:
        fair_prediction_indeces.append(i)


In [27]:
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
file = open('programs.txt','w')
for i in fair_prediction_indeces:
    print("File Identifire:",valid_examples[i][0])
    # print("Question:",valid_examples[i][1])

    question = original_data[i]['qa']['question']
    file.write("Tell me the answer of below question\n")
    print("Tell me the answer of below question, keeping reference of the table")
    print("Question".upper())
    print(question.upper())
    file.write("Questions\n")
    file.write(question.upper())
    file.write("\n")
    table = original_data[i]['table']
    file.write("Table\n")
    tabStr= ""
    for tab in table:
        tabStr = tabStr + " | ".join(tab) + "\n"
    file.write(tabStr)
    print("Table")
    print(table)
    
    
    
    print("Context".upper())
    print(tabulate(table, headers="firstrow", tablefmt="grid"))
    pred = all_predictions['pred_programs'][prediction_keys[i]]
    print("Prediction Program:", pred)
    gt = all_predictions['ref_programs'][ground_truth_keys[i]]
    # print("Actual program:",gt)
    invalid_flag, exe_res = eval_program(pred, table)
    print("Prediction Value:",exe_res)
    # print("Answer",valid_examples[i][4])
    file.write("Answer:")
    file.write(str(exe_res))
    file.write("\n\n")
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

file.close()

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
File Identifire: C/2017/page_328.pdf-1
Tell me the answer of below question, keeping reference of the table
QUESTION
WHAT WAS THE PERCENTAGE CUMULATIVE TOTAL RETURN FOR THE FIVE YEAR PERIOD ENDED 31-DEC-2017 OF CITI COMMON STOCK?
Table
[['date', 'citi', 's&p 500', 's&p financials'], ['31-dec-2012', '100.0', '100.0', '100.0'], ['31-dec-2013', '131.8', '132.4', '135.6'], ['31-dec-2014', '137.0', '150.5', '156.2'], ['31-dec-2015', '131.4', '152.6', '153.9'], ['31-dec-2016', '152.3', '170.8', '188.9'], ['31-dec-2017', '193.5', '208.1', '230.9']]
CONTEXT
+-------------+--------+-----------+------------------+
| date        |   citi |   s&p 500 |   s&p financials |
| 31-dec-2012 |  100   |     100   |            100   |
+-------------+--------+-----------+------------------+
| 31-dec-2013 |  131.8 |     132.4 |            135.6 |
+-------------+--------+-----------+------------------+
| 31-dec-2