In [1]:
import json
import os
import glob
import pickle

### Read LAED utt predictions from file

In [2]:
def laed_utterances_into_dialogs(laed_file):
    utterances = []
    # Process the individual predictions from LAED
    with open(laed_file) as f:
        dat = {}
        for line in f:
            if line.startswith("==== REPORT===="):
                break
            if len(line.strip()) == 0:
                if len(dat) > 0:
                    utterances.append(dat)
                dat = {}
                continue

            k, v = line.split(': ', 1)
            k = k.strip()
            v = v.strip()
            dat[k] = v

    laed_predictions = []
    dial = []
    for entry in utterances:
        if entry['Source'] == '<d>':
            if len(dial) > 0:
                laed_predictions.append(dial)
            dial = []

        dial.append(entry)

    # THE LAST ONE    
    laed_predictions.append(dial)
    return laed_predictions

### Read ResGEN outputs

In [3]:
def vlgan_json(path):
    vlgan_predictions = []
    with open(path) as vlganf:
        for line in vlganf:
            po = json.loads(line)
            vlgan_predictions.append(po)
    return vlgan_predictions

### Augment ResGEN json

In [4]:
def augment(vlgan_preds, laed_outputFile, tag):
    laed_predictions = laed_utterances_into_dialogs(laed_dd_skip_file)
    
    for i in range(len(vlgan_preds)):
        laed_sample, vlgan_sample = laed_predictions[i], vlgan_preds[i]
        laed_sample = laed_sample[:len(vlgan_sample)]
#         if len(laed_sample) < 12:
#             try:
#                 assert (len(laed_sample)) == len(vlgan_sample)
#             except AssertionError as E:
#                 print(i)
#                 print(len(laed_sample), len(vlgan_sample))
#                 print(laed_sample)
#                 print("=="*10)
#                 print(vlgan_sample)
#                 raise E
#         else:
#             assert len(vlgan_sample) == 11 and len(laed_sample) > 11

        for t in range(min(12, len(vlgan_sample))):
            _t = str(t)
            vlgan_preds[i][_t][f'{tag}'] = laed_sample[t]['Predict']
            vlgan_preds[i][_t][f'{tag}.Target'] = laed_sample[t]['Target']
    return vlgan_preds

In [5]:
vlgan_predictions = vlgan_json('dd/VLGAN/dd_predictions_tmp.json')

In [6]:
data_root = 'dd/LAED/2019-12-04T23-43-57-dailydial-skip.py/'
laed_dd_skip_file = glob.glob(data_root+'/*greedy.txt.txt')[0]
print(laed_dd_skip_file)

dd/LAED/2019-12-04T23-43-57-dailydial-skip.py/2019-12-04T23-44-05-test-greedy.txt.txt


In [7]:
vlgan_predictions = augment(vlgan_predictions, laed_dd_skip_file, 'laed-skip')

In [8]:
data_root = 'dd/LAED/2019-12-05T01-18-14-dailydialog-ae.py/'
laed_dd_skip_file = glob.glob(data_root+'/*greedy.txt.txt')[0]
print(laed_dd_skip_file)

dd/LAED/2019-12-05T01-18-14-dailydialog-ae.py/2019-12-05T01-18-22-test-greedy.txt.txt


In [9]:
vlgan_predictions = augment(vlgan_predictions, laed_dd_skip_file, 'laed-ae')

In [15]:
vlgan_predictions[799]

{'0': {'user-x': 0,
  'gold': 'So , Paula , where are you from ?',
  'laed-skip': 'plans, why are you so depressed?',
  'laed-skip.Target': 'so, paula, where are you from?',
  'laed-ae': 'excuse me, sir. i have a friend of room for you.',
  'laed-ae.Target': 'so, paula, where are you from?'},
 '1': {'user-x': 1,
  'gold': "I'm from Canada , originally .",
  'resgen': ['i m going to buy a new one .'],
  'laed-skip': "television? i 'm a bit of a hurry. i' m a little short - shape. i 'm looking for cologne.",
  'laed-skip.Target': "i 'm from canada, originally.",
  'laed-ae': 'well, i came to say i started playing chess.',
  'laed-ae.Target': "i 'm from canada, originally."},
 '2': {'user-x': 0,
  'gold': 'From Canada ? Where were you born ?',
  'resgen': ['oh , what s the ?'],
  'laed-skip': 'what kind of music do you like?',
  'laed-skip.Target': 'from canada? where were you born?',
  'laed-ae': '</d>',
  'laed-ae.Target': 'from canada? where were you born?'},
 '3': {'user-x': 1,
  'gol

In [17]:
with open('../outputs/dd_predictions_combined-Nov-21.json', 'w') as outf:
    for entry in vlgan_predictions:
        outf.write(json.dumps(entry) + '\n')