In [5]:
import json, jsonlines, sacrebleu
from torchseq.agents.para_agent import ParaphraseAgent
from torchseq.datasets.json_loader import JsonDataLoader
from torchseq.utils.config import Config

from torchseq.agents.seq2seq_agent import Seq2SeqAgent

import torch

# Which checkpoint should we load?
path_to_model = '/ist-project/scads/can/disentaglement_projects/torchseq/runs/vae/20220910_175450_paraphrasing_vae_mscoco'

DATA_PATH = '../data/'

# Load the data
with jsonlines.open(DATA_PATH + 'mscoco-eval/test.jsonl') as f:
    rows = [row for row in f]

examples = [{'input': row['sem_input']} for row in rows]


# Change the config to use the custom dataset
with open(path_to_model + "/config.json") as f:
    cfg_dict = json.load(f)
cfg_dict["dataset"] = "json"
cfg_dict["json_dataset"] = {
    "path": None,
    "field_map": [
        {"type": "copy", "from": "input", "to": "target"},
        {"type": "copy", "from": "input", "to": "source"},
#         {"type": "copy", "from": "input", "to": "template"},
    ],
}

# Enable the code predictor
# cfg_dict["bottleneck"]["code_predictor"]["infer_codes"] = True

# Create the dataset and model
config = Config(cfg_dict)
data_loader = JsonDataLoader(config, test_samples=examples, data_path=DATA_PATH)
checkpoint_path = path_to_model + "/model/checkpoint.pt"
# instance = ParaphraseAgent(config=config, run_id=None,  output_path=None, data_path=DATA_PATH, silent=False, verbose=False, training_mode=False)
instance = Seq2SeqAgent(config=config, run_id=None,  output_path=None, data_path=DATA_PATH, silent=False, verbose=False, training_mode=False)
# Load the checkpoint
instance.load_checkpoint(checkpoint_path)
instance.model.eval()
    
# Finally, run inference
_, _, (pred_output, _, _), _ = instance.inference(data_loader.test_loader)


# Prepare the references
refs = [q["paras"] for q in rows]
inputs = [[q["sem_input"]] for q in rows]

max_num_refs = max([len(x) for x in refs])
refs_padded = [x + [x[0]] * (max_num_refs - len(x)) for x in refs]

# Calculate iBLEU scores
tgt_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*refs_padded)), lowercase=True).score
self_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*inputs)), lowercase=True).score

alpha = 0.8
ibleu = alpha * tgt_bleu - (1 - alpha) * self_bleu

print('MSCOCO Score: {:0.2f}/{:0.2f}/{:0.2f}'.format( tgt_bleu, self_bleu, ibleu))

num_similar_heads is deprecated! Use "splice_head_offset" instead


Validating after 31 epochs: 100%|██████████| 313/313 [01:14<00:00,  4.20it/s]


MSCOCO Score: 24.94/17.14/16.53
