In [1]:
import os
import json
import glob
import pprint
pp = pprint.PrettyPrinter(indent=2)

In [2]:
data_root = 'multiwoz/HDSA'

In [3]:
glob.glob(data_root + '/*')

['multiwoz/HDSA/results.txt.pred.non_delex',
 'multiwoz/HDSA/results.txt.pred.BERT_dim128_w_domain.pred',
 'multiwoz/HDSA/results.txt.non_delex']

### Read ResGEN outputs

In [4]:
def vlgan_json(path):
    vlgan_predictions = {}
    with open(path) as vlganf:
        for line in vlganf:
            po = json.loads(line)
            vlgan_predictions[po['0']['info']['name']] = po
    return vlgan_predictions

In [5]:
vlgan_predictions = vlgan_json('multiwoz/VLGAN/multiwoz_predictions_Nov-21.json')

In [6]:
len(vlgan_predictions)

1000

### Augment ResGEN json

In [7]:
hdsa_predictions = json.load(open('multiwoz/HDSA/results.txt.pred.non_delex'))

In [8]:
def augment(vlgan_preds, tag):
    for i in vlgan_preds:
        hdsa_sample, vlgan_sample = hdsa_predictions[i.replace('.json', '')], vlgan_preds[i]
        for t in vlgan_sample:
            # HDSA only predicts at agent steps!
            _t = int(t)
            if _t%2 == 1:
                vlgan_preds[i][t]['hdsa'] = hdsa_sample[_t//2]

In [9]:
augment(vlgan_predictions, 'hdsa')

In [10]:
vlgan_predictions = list(vlgan_predictions.values())

### [HIDDEN] tags

In [11]:
from utils import Lang
import pickle

In [12]:
lang = pickle.load(open('../d_vocab_lang.pickle', 'rb'))

In [13]:
def trickMe(s):
    s = s.replace('<unk>', 'jldifuwlaf') # No such word!
    s = ' '.join(lang.decodeSentence(lang.encodeSentence(s)))
    s = s.capitalize()
    s = s.replace('<unk>', '[HIDDEN]')
    return s

In [14]:
for i, example in enumerate(vlgan_predictions):
    for t in example:
        keys = ['gold', 'resgen', 'hdsa']
        for key in keys:
            if key in example[t]:
                if key == 'resgen':
                    example[t]['resgen'] = [trickMe(r) for r in example[t]['resgen']]
                else:
                    example[t][key] = trickMe(example[t][key])
    vlgan_predictions[i] = example

### To file

In [15]:
with open('../outputs/multiwoz_predictions_combined-Nov-21.json', 'w') as outf:
    for entry in vlgan_predictions:
        outf.write(json.dumps(entry) + '\n')