# Experimenting with different distractors

## Initial setup

In [38]:
# setup
import time
import sys, os
import torch
import logging

ONMT_DIR = '../myOpenNMT'
sys.path.append(os.path.abspath(ONMT_DIR))

from bbrsa import ONMTSummaryRSA
from models import ONMTSummarizer
from pragmatics import NextExampleDistractor, BasicPragmatics, IdenticalDistractor
from utils import init_logger, display

# logger = init_logger(no_format=True, print_level=logging.WARNING, log_file='logs/log5.txt',
#                      log_file_level=logging.WARNING, log_mode='w')
logger = init_logger(no_format=True, print_level=logging.WARNING)

## Load files

In [2]:
src_file = 'data/giga_small_input.txt'
tgt_file = 'data/giga_small_target.txt'
with open(src_file, 'r') as f:
    src = f.readlines()
with open(tgt_file, 'r') as f:
    tgt = f.readlines()
src = src[:5]
tgt = tgt[:5]

## Preparing models

In [3]:
giga_config_path = 'giga_inference.yml'
s0 = ONMTSummarizer(config_path=giga_config_path, logger=logger)
pragmatics = BasicPragmatics(alpha=10, logger=logger)
distractor = NextExampleDistractor(batch_size=s0.opt.batch_size, logger=logger)
model = ONMTSummaryRSA(s0, pragmatics, distractor, logger=logger)


In [4]:
pred1 = model.summarize_with_distractor(src, beam_size=1)
pred2 = model.summarize_with_s0(src, beam_size=1)


  var = torch.tensor(arr, dtype=self.dtype, device=device)


## Summarize

In [5]:

display(['S1', 'S0'], [pred1, pred2])

S1: nec UNK computer to join forces
S0: nec UNK computer to join forces in supercomputer sales

S1: sri lanka closes schools with immediate effect
S0: sri lanka closes schools with immediate effect on tamil rebels

S1: police arrest five anti-nuclear protesters
S0: police arrest five anti-nuclear protesters

S1: factory orders up #.# percent in september
S0: factory orders up #.# percent in september

S1: boj urges calm after daiwa bank closure order lifted
S0: boj urges calm after daiwa bank closure



## Experimenting on the Gigaword dataset

$\alpha = 5$
__(distractor)__
police arrested __six anti-government__ protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .

S1: six anti-government protesters arrested in france

S0: police arrest six protesters in attempt to disrupt antarctic research

__(original text)__
police arrested __five anti-nuclear__ protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .

S1: five anti-nuclear protesters arrested

S0: police arrest five anti-nuclear protesters

__(reference)__ protesters target french research ship




In [6]:
src = ['police arrested six anti-government protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

pragmatics = BasicPragmatics(alpha=5, logger=logger)
model = ONMTSummaryRSA(s0, pragmatics, distractor, logger=logger)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])

S1: six anti-government protesters arrested in france
S0: police arrest six protesters in attempt to disrupt antarctic research

S1: five anti-nuclear protesters arrested
S0: police arrest five anti-nuclear protesters





__(distractor)__ police arrested five anti-nuclear protesters friday after they sought to disrupt __operations at a nuclear plant__ , a spokesman for the protesters said .

S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five anti-nuclear protesters

S1($\alpha = 5$): nuclear plant protesters arrested
    
S1($\alpha = 10$): nuclear plant operations shut 

S0: police arrest five anti-nuclear protesters

__(original)__ police arrested five anti-nuclear protesters friday after they sought to disrupt __loading of a french antarctic research and supply vessel__ , a spokesman for the protesters said .

S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): anti-nuclear protesters arrested in france

S1($\alpha = 5$): antarctic research vessel arrested after french ship blocked

S1($\alpha = 10$): antarctic research vessel arrested over french ship

S0: police arrest five anti-nuclear protesters



In [29]:
src = ['police arrested five anti-nuclear protesters friday after they sought to disrupt operations at a nuclear plant , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

model.set_alpha(3)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])
print()

S1: nuclear plant protesters arrested
S0: police arrest five anti-nuclear protesters

S1: anti-nuclear protesters arrested in france
S0: police arrest five anti-nuclear protesters




__(distractor)__ police arrested five anti-nuclear protesters friday after they sought to disrupt loading of __an american__ antarctic research and supply vessel , a spokesman for the protesters said .

S0, S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five protesters in attempt to disrupt u.s. antarctic research

S1($\alpha = 3$): police arrest five protesters in anti-whaling protest

S1($\alpha = 4$): police arrest five protesters

S1($\alpha = 5$): american antarctic protesters arrested

S1($\alpha = 10$): american antarctic protesters arrested

__(original)__ police arrested five anti-nuclear protesters friday after they sought to disrupt loading of __a french__ antarctic research and supply vessel , a spokesman for the protesters said .

S0, S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five anti-nuclear protesters

S1($\alpha = 3$): police arrest five anti-nuclear protesters

S1($\alpha = 4$): french police arrest five anti-nuclear protesters

S1($\alpha = 5$): french police arrest five anti-nuclear protesters

S1($\alpha = 10$): french antarctic protesters arrested


In [39]:
src = ['police arrested five anti-nuclear protesters friday after they sought to disrupt loading of an american antarctic research and supply vessel , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

model.set_alpha(4)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])

S1: police arrest five protesters
S0: police arrest five anti-nuclear protesters

S1: french police arrest five anti-nuclear protesters
S0: police arrest five anti-nuclear protesters

