In [2]:
import json, jsonlines, sacrebleu
from torchseq.agents.para_agent import ParaphraseAgent
from torchseq.datasets.json_loader import JsonDataLoader
from torchseq.utils.config import Config

from torchseq.agents.seq2seq_agent import Seq2SeqAgent

import torch

# Which checkpoint should we load?
path_to_model = '/ist-project/scads/can/disentaglement_projects/torchseq/runs/vae/20221004_204410_paraphrasing_vae_mscoco_sep_loss_full_test789_smallsim'

DATA_PATH = '../data/'

# Load the data
with jsonlines.open(DATA_PATH + 'mscoco-eval/test.jsonl') as f:
    rows = [row for row in f]

examples = [{'input': row['sem_input']} for row in rows]


# Change the config to use the custom dataset
with open(path_to_model + "/config.json") as f:
    cfg_dict = json.load(f)
cfg_dict["dataset"] = "json"
cfg_dict["json_dataset"] = {
    "path": None,
    "field_map": [
        {"type": "copy", "from": "input", "to": "target"},
        {"type": "copy", "from": "input", "to": "source"},
#         {"type": "copy", "from": "input", "to": "template"},
    ],
}
cfg_dict["beam_search"]["beam_width"] = 3 # set beam
# Enable the code predictor
# cfg_dict["bottleneck"]["code_predictor"]["infer_codes"] = True

# Create the dataset and model
config = Config(cfg_dict)
data_loader = JsonDataLoader(config, test_samples=examples, data_path=DATA_PATH)
checkpoint_path = path_to_model + "/model/checkpoint.pt"
# instance = ParaphraseAgent(config=config, run_id=None,  output_path=None, data_path=DATA_PATH, silent=False, verbose=False, training_mode=False)
instance = Seq2SeqAgent(config=config, run_id=None,  output_path=None, data_path=DATA_PATH, silent=False, verbose=False, training_mode=False)
# Load the checkpoint
instance.load_checkpoint(checkpoint_path)
instance.model.eval()
    
# Finally, run inference
test_loss, all_metrics, (pred_output, gold_output, gold_input), memory_values_to_return  = instance.inference(data_loader.test_loader)


# Prepare the references
refs = [q["paras"] for q in rows]
inputs = [[q["sem_input"]] for q in rows]

max_num_refs = max([len(x) for x in refs])
refs_padded = [x + [x[0]] * (max_num_refs - len(x)) for x in refs]

# Calculate iBLEU scores
tgt_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*refs_padded)), lowercase=True).score
self_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*inputs)), lowercase=True).score

alpha = 0.8
ibleu = alpha * tgt_bleu - (1 - alpha) * self_bleu

print('MSCOCO Score: {:0.2f}/{:0.2f}/{:0.2f}'.format( tgt_bleu, self_bleu, ibleu))

num_similar_heads is deprecated! Use "splice_head_offset" instead


Validating after 31 epochs: 100%|██████████| 313/313 [01:08<00:00,  4.59it/s]


MSCOCO Score: 25.29/17.72/16.69


In [2]:
# 123: 24.94/17.14/16.53
# 456: 24.93/16.98/16.55
# 789: 24.74/17.08/16.38

# sep123: 24.71/17.03/16.36
# sep456: 25.16/16.93/16.74

# fullsep123: 25.63/17.62/16.98
# fullsep456: 25.37/18.14/16.67
# fullsep789: 25.29/17.72/16.69


# fullsep123smallsim: 25.63/17.62/16.98
# fullsep456: 25.37/18.14/16.67
# fullsep789: 25.29/17.72/16.69


# expectation: 27.44/24.40/16.99

In [19]:
tgt_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*refs_padded)), lowercase=False).score
self_bleu = sacrebleu.corpus_bleu(pred_output, list(zip(*inputs)), lowercase=False).score

alpha = 0.8
ibleu = alpha * tgt_bleu - (1 - alpha) * self_bleu

print('MSCOCO Score: {:0.2f}/{:0.2f}/{:0.2f}'.format( tgt_bleu, self_bleu, ibleu))

MSCOCO Score: 22.08/14.87/14.69


In [20]:
all_metrics

{}

In [5]:
refs[0]

['A Honda motorcycle parked in a grass driveway',
 'A black Honda motorcycle with a dark burgundy seat.',
 'Ma motorcycle parked on the gravel in front of a garage',
 'A motorcycle with its brake extended standing outside']

In [6]:
refs_padded[0]

['A Honda motorcycle parked in a grass driveway',
 'A black Honda motorcycle with a dark burgundy seat.',
 'Ma motorcycle parked on the gravel in front of a garage',
 'A motorcycle with its brake extended standing outside',
 'A Honda motorcycle parked in a grass driveway',
 'A Honda motorcycle parked in a grass driveway']

In [22]:
#
for i in [16, 5, 27, 33, 236]:
    print(inputs[i][0],pred_output[i])

A woman wearing a hat & pink top takes a selfie in front of a bathroom mirror. a woman taking a selfie in front of a bathroom mirror.
A cat eating a bird it has caught. a cat eating a bird on the ground.
A young attractive woman sitting on a toilette on the side of a street. a woman sitting on a toilet in a bathroom.
A toddler celebrates his birthday with a cupcake. a young boy sitting at a table with a cupcake.
A plane floating on top of a lake surrounded by mountains. a small plane is sitting in the water


In [11]:
inputs[2]

['A small closed toilet in a cramped space.']

# 789
['A woman wearing a hat & pink top takes a selfie in front of a bathroom mirror.'] a woman taking a selfie in front of a bathroom mirror.
['A cat eating a bird it has caught.'] a cat eating a bird on the ground.
['A young attractive woman sitting on a toilette on the side of a street.'] a woman sitting on a toilet outside on the sidewalk.
['A toddler celebrates his birthday with a cupcake.'] a little boy sitting at a table with a birthday cake.
['A plane floating on top of a lake surrounded by mountains.'] a plane is flying over a body of water.
In [11]:

inputs[2]

In [17]:
!ls /ist-project/scads/can/disentaglement_projects/torchseq/runs/vae

20220910_175450_paraphrasing_vae_mscoco
20220913_153159_paraphrasing_vae_mscoco_456
20220913_180809_paraphrasing_vae_paralex
20220913_220750_paraphrasing_vae_mscoco_sep_loss
20220914_114052_paraphrasing_vae_mscoco_sep_loss
20220914_152427_paraphrasing_vae_mscoco_sep_loss
20220914_160820_paraphrasing_vae_mscoco_sep_loss
20220914_161142_paraphrasing_vae_mscoco_sep_loss
20220914_163719_paraphrasing_vae_mscoco_sep_loss
20220916_122639_paraphrasing_vae_mscoco_sep_loss456
20220919_175013_paraphrasing_vae_mscoco_sep_loss_full_test456
20220920_232720_paraphrasing_vae_mscoco_789
20220921_131137_paraphrasing_vae_mscoco_sep_loss_full_test789
20220922_004816_paraphrasing_vae_mscoco_sep_loss_full_test123
