In [1]:
# coding: utf-8
from src.train_and_evaluate import *
from src.models import *
import time
import torch.optim
from src.expressions_transfer import *
import json

def read_json(path):
    with open(path,'r',encoding="utf-8") as f:
        file = json.load(f)
    return file


batch_size = 64
embedding_size = 128
hidden_size = 512
n_epochs = 80
learning_rate = 1e-3
weight_decay = 1e-5
beam_size = 5
n_layers = 2
ori_path = './data/new_'
prefix = '23k_processed.json'


In [2]:

def get_train_test_fold(ori_path,prefix,data,pairs,group):
    mode_train = 'train'
    mode_valid = 'valid'
    mode_test = 'test'
    train_path = ori_path + mode_train + prefix
    valid_path = ori_path + mode_valid + prefix
    test_path = ori_path + mode_test + prefix
    train = read_json(train_path)
    train_id = [item['id'] for item in train]
    valid = read_json(valid_path)
    valid_id = [item['id'] for item in valid]
    test = read_json(test_path)
    test_id = [item['id'] for item in test]
    train_fold = []
    valid_fold = []
    test_fold = []
    for item,pair,g in zip(data, pairs, group):
        pair = list(pair)
        pair.append(g['group_num'])
        pair.append(item['id'])
        pair = tuple(pair)
        if item['id'] in train_id:
            train_fold.append(pair)
        elif item['id'] in test_id:
            test_fold.append(pair)
        else:
            valid_fold.append(pair)
    return train_fold, test_fold, valid_fold

def change_num(num):
    new_num = []
    for item in num:
        if '/' in item:
            new_str = item.split(')')[0]
            new_str = new_str.split('(')[1]
            a = float(new_str.split('/')[0])
            b = float(new_str.split('/')[1])
            value = a/b
            new_num.append(value)
        elif '%' in item:
            value = float(item[0:-1])/100
            new_num.append(value)
        else:
            new_num.append(float(item))
    return new_num


#data = load_raw_data("data/Math_23K.json")
#group_data = read_json("data/Math_23K_processed.json")

data = load_raw_data("data/new_Math_23K.json")
group_data =  read_json("data/Math_23K_processed.json")

pairs, generate_nums, copy_nums = transfer_num(data)

temp_pairs = []
for p in pairs:
    temp_pairs.append((p[0], from_infix_to_prefix(p[1]), p[2], p[3]))
pairs = temp_pairs

train_fold, test_fold, valid_fold = get_train_test_fold(ori_path,prefix,data,pairs,group_data)


Reading lines...
Transfer numbers...


In [3]:

best_acc_fold = []

pairs_tested = test_fold
#pairs_trained = valid_fold
pairs_trained = train_fold

#for fold_t in range(5):
#    if fold_t == fold:
#        pairs_tested += fold_pairs[fold_t]
#    else:
#        pairs_trained += fold_pairs[fold_t]

input_lang, output_lang, train_pairs, test_pairs = prepare_data(pairs_trained, pairs_tested, 5, generate_nums,
                                                                copy_nums, tree=True)

#print('train_pairs[0]')
#print(train_pairs[0])
#exit()
# Initialize models
encoder = EncoderSeq(input_size=input_lang.n_words, embedding_size=embedding_size, hidden_size=hidden_size,
                     n_layers=n_layers)
predict = Prediction(hidden_size=hidden_size, op_nums=output_lang.n_words - copy_nums - 1 - len(generate_nums),
                     input_size=len(generate_nums))
generate = GenerateNode(hidden_size=hidden_size, op_nums=output_lang.n_words - copy_nums - 1 - len(generate_nums),
                        embedding_size=embedding_size)
merge = Merge(hidden_size=hidden_size, embedding_size=embedding_size)
# the embedding layer is  only for generated number embeddings, operators, and paddings

encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=weight_decay)
predict_optimizer = torch.optim.Adam(predict.parameters(), lr=learning_rate, weight_decay=weight_decay)
generate_optimizer = torch.optim.Adam(generate.parameters(), lr=learning_rate, weight_decay=weight_decay)
merge_optimizer = torch.optim.Adam(merge.parameters(), lr=learning_rate, weight_decay=weight_decay)

encoder_scheduler = torch.optim.lr_scheduler.StepLR(encoder_optimizer, step_size=20, gamma=0.5)
predict_scheduler = torch.optim.lr_scheduler.StepLR(predict_optimizer, step_size=20, gamma=0.5)
generate_scheduler = torch.optim.lr_scheduler.StepLR(generate_optimizer, step_size=20, gamma=0.5)
merge_scheduler = torch.optim.lr_scheduler.StepLR(merge_optimizer, step_size=20, gamma=0.5)

# Move models to GPU
if USE_CUDA:
    encoder.cuda()
    predict.cuda()
    generate.cuda()
    merge.cuda()



Indexing words...
keep_words 1346 / 3867 = 0.3481
Indexed 1349 words in input language, 15 words in output
Number of training data 5332
Number of testind data 1334


In [4]:
encoder.load_state_dict(torch.load("model_traintest/encoder"))
predict.load_state_dict(torch.load("model_traintest/predict"))
generate.load_state_dict(torch.load("model_traintest/generate"))
merge.load_state_dict(torch.load("model_traintest/merge"))

<All keys matched successfully>

In [8]:
with open('../../../generalization_data_test.json', encoding="utf-8") as json_file:
   data_test = json.load(json_file)

In [9]:
from collections import defaultdict
data_by_type = defaultdict(list)

for x in data_test:
    data_by_type[x['type']].append(x)

In [12]:
from tqdm import tqdm

generate_num_ids = []
for num in generate_nums:
    generate_num_ids.append(output_lang.word2index[num])

for typ, ls in data_by_type.items():
    print("TYPE:")
    print(typ)
    value_ac = 0
    equation_ac = 0
    eval_total = 0
    start = time.time()
    
    batches = {}
    for test_batch in test_pairs:
        batches[test_batch[8]] = test_batch
        
    for correct_data in tqdm(ls):
        #print(test_batch)
        test_batch = batches[correct_data['id']]
        #print(test_batch)
        batch_graph = get_single_example_graph(test_batch[0], test_batch[1], test_batch[7], test_batch[4], test_batch[5])
        test_res = evaluate_tree(test_batch[0], test_batch[1], generate_num_ids, encoder, predict, generate,
                                 merge, output_lang, test_batch[5], batch_graph, beam_size=beam_size)
        val_ac, equ_ac, _, _ = compute_prefix_tree_result(test_res, test_batch[2], output_lang, test_batch[4], test_batch[6])
        if val_ac:
            value_ac += 1
        if equ_ac:
            equation_ac += 1
        eval_total += 1
    print(equation_ac, value_ac, eval_total)
    print("test_answer_acc", float(equation_ac) / eval_total, float(value_ac) / eval_total)
    print("testing time", time_since(time.time() - start))
    print("------------------------------------------------------")


  0%|          | 0/337 [00:00<?, ?it/s]

TYPE:
journey


100%|██████████| 337/337 [04:06<00:00,  1.37it/s]
  0%|          | 0/383 [00:00<?, ?it/s]

92 115 337
test_answer_acc 0.27299703264094954 0.34124629080118696
testing time 0h 4m 6s
------------------------------------------------------
TYPE:
relation


100%|██████████| 383/383 [03:42<00:00,  1.72it/s]
  0%|          | 0/382 [00:00<?, ?it/s]

190 211 383
test_answer_acc 0.4960835509138381 0.5509138381201044
testing time 0h 3m 42s
------------------------------------------------------
TYPE:
price


100%|██████████| 382/382 [04:00<00:00,  1.59it/s]
  0%|          | 0/232 [00:00<?, ?it/s]

123 160 382
test_answer_acc 0.3219895287958115 0.418848167539267
testing time 0h 4m 0s
------------------------------------------------------
TYPE:
task_completion


100%|██████████| 232/232 [03:12<00:00,  1.20it/s]

77 110 232
test_answer_acc 0.33189655172413796 0.47413793103448276
testing time 0h 3m 12s
------------------------------------------------------





In [28]:
test_pairs[0]

([53,
  140,
  136,
  12,
  1190,
  1006,
  112,
  13,
  125,
  206,
  1,
  117,
  12,
  128,
  2,
  130,
  2,
  68,
  1,
  125,
  12,
  1190,
  153,
  112,
  13,
  125,
  206,
  1,
  117,
  12,
  1190,
  153,
  128,
  2,
  253,
  2,
  691,
  75,
  142,
  21],
 40,
 [2, 0, 6, 7, 8],
 5,
 ['71', '12', '95'],
 [10, 18, 27],
 [],
 [15, 16, 17, 32, 33, 34, 39, 40, 41])