In [1]:
import json
import os
import glob
import pickle

### Read LAED utt predictions from file

In [2]:
def laed_utterances_into_dialogs(laed_file):
    utterances = []
    # Process the individual predictions from LAED
    with open(laed_file) as f:
        dat = {}
        for line in f:
            if line.startswith("==== REPORT===="):
                break
            if len(line.strip()) == 0:
                if len(dat) > 0:
                    utterances.append(dat)
                dat = {}
                continue

            k, v = line.split(': ', 1)
            k = k.strip()
            v = v.strip()
            dat[k] = v

    laed_predictions = []
    dial = []
    for entry in utterances:
        if entry['Source'] == '<d>':
            if len(dial) > 0:
                laed_predictions.append(dial)
            dial = []

        dial.append(entry)

    # THE LAST ONE    
    laed_predictions.append(dial)
    return laed_predictions

### Read ResGEN outputs

In [3]:
def vlgan_json(path):
    vlgan_predictions = []
    with open(path) as vlganf:
        for line in vlganf:
            po = json.loads(line)
            vlgan_predictions.append(po)
    return vlgan_predictions

### Augment ResGEN json

In [4]:
def augment(vlgan_preds, laed_outputFile, tag):
    laed_predictions = laed_utterances_into_dialogs(laed_dd_skip_file)
    
    for i in range(len(vlgan_preds)):
        laed_sample, vlgan_sample = laed_predictions[i], vlgan_preds[i]
        laed_sample = laed_sample[:len(vlgan_sample)]
#         if len(laed_sample) < 12:
#             try:
#                 assert (len(laed_sample)) == len(vlgan_sample)
#             except AssertionError as E:
#                 print(i)
#                 print(len(laed_sample), len(vlgan_sample))
#                 print(laed_sample)
#                 print("=="*10)
#                 print(vlgan_sample)
#                 raise E
#         else:
#             assert len(vlgan_sample) == 11 and len(laed_sample) > 11

        for t in range(min(12, len(vlgan_sample))):
            _t = str(t)
            vlgan_preds[i][_t][f'{tag}'] = laed_sample[t]['Predict']
            vlgan_preds[i][_t][f'{tag}.Target'] = laed_sample[t]['Target']
    return vlgan_preds

In [5]:
vlgan_predictions = vlgan_json('dd/VLGAN/dd_predictions_tmp.json')

In [6]:
data_root = 'dd/LAED/2019-12-04T23-43-57-dailydial-skip.py/'
laed_dd_skip_file = glob.glob(data_root+'/*greedy.txt.txt')[0]
print(laed_dd_skip_file)

dd/LAED/2019-12-04T23-43-57-dailydial-skip.py/2019-12-04T23-44-05-test-greedy.txt.txt


In [7]:
vlgan_predictions = augment(vlgan_predictions, laed_dd_skip_file, 'laed-skip')

In [8]:
data_root = 'dd/LAED/2019-12-05T01-18-14-dailydialog-ae.py/'
laed_dd_skip_file = glob.glob(data_root+'/*greedy.txt.txt')[0]
print(laed_dd_skip_file)

dd/LAED/2019-12-05T01-18-14-dailydialog-ae.py/2019-12-05T01-18-22-test-greedy.txt.txt


In [9]:
vlgan_predictions = augment(vlgan_predictions, laed_dd_skip_file, 'laed-ae')

In [14]:
vlgan_predictions[560]

{'0': {'user-x': 0,
  'gold': 'Hi , I think I was supposed to call for my test results today .',
  'laed-skip': 'hello, sir. can i help you?',
  'laed-skip.Target': 'hi, i think i was supposed to call for my test results today.',
  'laed-ae': 'it s my pleasure. i m so sorry to meet you.',
  'laed-ae.Target': 'hi, i think i was supposed to call for my test results today.'},
 '1': {'user-x': 1,
  'gold': 'If you go onto our website and put in your password , you can access your test results .',
  'resgen': ['you are right , sir , i m sure you can .'],
  'laed-skip': 'welcome to you.',
  'laed-skip.Target': 'if you go onto our website and put in your password, you can access your test results.',
  'laed-ae': 'do you have the same problem?',
  'laed-ae.Target': 'if you go onto our website and put in your password, you can access your test results.'},
 '2': {'user-x': 0,
  'gold': 'Are you saying that there weren ’ t any problems ?',
  'resgen': ['so what are you talking about ?'],
  'laed-