# Experimenting with different distractors

## Initial setup

In [1]:
# setup
import time
import sys, os
import torch
import logging

ONMT_DIR = '../myOpenNMT'
sys.path.append(os.path.abspath(ONMT_DIR))

from bbrsa import ONMTSummaryRSA
from models import ONMTSummarizer
from pragmatics import NextExampleDistractor, IdenticalDistractor
from pragmatics import BasicPragmatics, MemoizedListener
from utils import init_logger, display

# logger = init_logger(no_format=True, print_level=logging.WARNING, log_file='logs/log5.txt',
#                      log_file_level=logging.WARNING, log_mode='w')
logger = init_logger(no_format=True, print_level=logging.DEBUG)

## Load files

In [2]:
src_file = 'data/giga_small_input.txt'
tgt_file = 'data/giga_small_target.txt'
with open(src_file, 'r') as f:
    big_src = f.readlines()
with open(tgt_file, 'r') as f:
    big_tgt = f.readlines()
src = big_src
tgt = big_tgt

## Preparing models

In [3]:
giga_config_path = 'giga_inference.yml'
s0 = ONMTSummarizer(config_path=giga_config_path, logger=logger)
pragmatics = MemoizedListener(alpha=1, logger=logger)
distractor = NextExampleDistractor(batch_size=s0.opt.batch_size, logger=logger)
model = ONMTSummaryRSA(s0, pragmatics, distractor, logger=logger)

pred1 = model.summarize_with_distractor(src, beam_size=1)
pred2 = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [pred1, pred2])


Configuring summary model...
Import successful
Finished configuration.

==== Beginning Summary with distractor ====
  var = torch.tensor(arr, dtype=self.dtype, device=device)
==== Beginning Summary with S0 ====


S1: nec UNK computer to join forces in supercomputer sales
S0: nec UNK computer to join forces in supercomputer sales

S1: sri lanka announces closure of schools
S0: sri lanka closes schools with immediate effect on tamil rebels

S1: police arrest five anti-nuclear protesters after trying to loading french research ship
S0: police arrest five anti-nuclear protesters

S1: factory orders up #.# percent in september
S0: factory orders up #.# percent in september

S1: boj urges calm after daiwa bank us deal
S0: boj urges calm after daiwa bank closure

S1: croatian croatian negotiators agree to meet
S0: croatian croatian negotiators to meet saturday on last serb-held area

S1: toyota europe banned from world rally championship
S0: toyota europe banned from world rally championship

S1: israel prepares for rabin funeral
S0: israel prepares for rabin funeral

S1: indian pm 's promise of autonomy in promise
S0: indian pm 's promise of autonomy for kashmir sparks violent reaction



## Experimenting on the Gigaword dataset

$\alpha = 5$
__(distractor)__
police arrested __six anti-government__ protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .

S1: six anti-government protesters arrested in france

S0: police arrest six protesters in attempt to disrupt antarctic research

__(original text)__
police arrested __five anti-nuclear__ protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .

S1: five anti-nuclear protesters arrested

S0: police arrest five anti-nuclear protesters

__(reference)__ protesters target french research ship




In [4]:
src = ['police arrested six anti-government protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

pragmatics = BasicPragmatics(alpha=5, logger=logger)
model = ONMTSummaryRSA(s0, pragmatics, distractor, logger=logger)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])

==== Beginning Summary with distractor ====
==== Beginning Summary with S0 ====


S1: six anti-government protesters arrested in france
S0: police arrest six protesters in attempt to disrupt antarctic research

S1: five anti-nuclear protesters arrested
S0: police arrest five anti-nuclear protesters





__(distractor)__ police arrested five anti-nuclear protesters friday after they sought to disrupt __operations at a nuclear plant__ , a spokesman for the protesters said .

S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five anti-nuclear protesters

S1($\alpha = 5$): nuclear plant protesters arrested
    
S1($\alpha = 10$): nuclear plant operations shut 

S0: police arrest five anti-nuclear protesters

__(original)__ police arrested five anti-nuclear protesters friday after they sought to disrupt __loading of a french antarctic research and supply vessel__ , a spokesman for the protesters said .

S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): anti-nuclear protesters arrested in france

S1($\alpha = 5$): antarctic research vessel arrested after french ship blocked

S1($\alpha = 10$): antarctic research vessel arrested over french ship

S0: police arrest five anti-nuclear protesters



In [5]:
src = ['police arrested five anti-nuclear protesters friday after they sought to disrupt operations at a nuclear plant , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

model.set_alpha(3)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])
print()

==== Beginning Summary with distractor ====
==== Beginning Summary with S0 ====


S1: nuclear plant protesters arrested
S0: police arrest five anti-nuclear protesters

S1: anti-nuclear protesters arrested in france
S0: police arrest five anti-nuclear protesters




__(distractor)__ police arrested five anti-nuclear protesters friday after they sought to disrupt loading of __an american__ antarctic research and supply vessel , a spokesman for the protesters said .

S0, S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five protesters in attempt to disrupt u.s. antarctic research

S1($\alpha = 3$): police arrest five protesters in anti-whaling protest

S1($\alpha = 4$): police arrest five protesters

S1($\alpha = 5$): american antarctic protesters arrested

S1($\alpha = 10$): american antarctic protesters arrested

__(original)__ police arrested five anti-nuclear protesters friday after they sought to disrupt loading of __a french__ antarctic research and supply vessel , a spokesman for the protesters said .

S0, S1($\alpha = 1$): police arrest five anti-nuclear protesters

S1($\alpha = 2$): police arrest five anti-nuclear protesters

S1($\alpha = 3$): police arrest five anti-nuclear protesters

S1($\alpha = 4$): french police arrest five anti-nuclear protesters

S1($\alpha = 5$): french police arrest five anti-nuclear protesters

S1($\alpha = 10$): french antarctic protesters arrested


In [6]:
src = ['police arrested five climate change protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of a french antarctic research and supply vessel , a spokesman for the protesters said .']

model.set_alpha(4)
s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])

==== Beginning Summary with distractor ====
==== Beginning Summary with S0 ====


S1: climate change protesters arrested at french antarctic research
S0: police arrest five climate change protesters

S1: anti-nuclear protesters arrested
S0: police arrest five anti-nuclear protesters



## Trying greater number of distractors

In [7]:
from pragmatics import NextNDistractor

testsrc = ['a', 'b', 'c', 'd', 'e', 'f', 'g']

testdistractor = NextNDistractor(batch_size=20, N=5)
print(testdistractor.generate(testsrc))

s0 = ONMTSummarizer(config_path=giga_config_path)
pragmatics = BasicPragmatics(alpha=3)
distractor = NextNDistractor(batch_size=20, N=5)

model4 = ONMTSummaryRSA(s0, pragmatics, distractor)

start_time = time.time()
s1_pred = model.summarize_with_distractor(big_src, beam_size=10)
duration = time.time() - start_time

print('s1 finished, duration:', duration)
s0_pred = model.summarize_with_s0(big_src, beam_size=3)


(['a', 'b', 'c', 'd', 'e', 'f', 'b', 'c', 'd', 'e', 'f', 'g', 'c', 'd', 'e', 'f', 'g', 'a', 'd', 'e', 'f', 'g', 'a', 'b', 'e', 'f', 'g', 'a', 'b', 'c', 'f', 'g', 'a', 'b', 'c', 'd', 'g', 'a', 'b', 'c', 'd', 'e'], 120)
Configuring summary model...
Import successful


==== Beginning Summary with distractor ====


Finished configuration.



==== Beginning Summary with S0 ====


s1 finished, duration: 2.741589307785034


## Using memoized L1

In [55]:

pragmatics = MemoizedListener(alpha=1)
distractor = NextNDistractor(batch_size=s0.opt.batch_size, N=2)
model = ONMTSummaryRSA(s0, pragmatics, distractor)

src = ['police arrested five anti-whaling protesters friday after they sought to disrupt loading of a japanese whaling research and supply vessel , a spokesman for the protesters said .',
       'greenpeace supported five anti-nuclear protesters friday after they sought to disrupt loading of a russian antarctic research and supply vessel , a spokesman for the protesters said .',
       'police arrested five anti-nuclear protesters friday after they sought to disrupt loading of an american antarctic research and supply vessel , a spokesman for the protesters said .']


s1_pred = model.summarize_with_distractor(src, beam_size=1)
s0_pred = model.summarize_with_s0(src, beam_size=1)

display(['S1', 'S0'], [s1_pred, s0_pred])

==== Beginning Summary with distractor ====
==== Beginning Summary with S0 ====
S1: anti-whaling protesters arrested after whaling protest
S0: anti-whaling protesters arrested in tokyo

S1: greenpeace backs greenpeace protests against russian blockade
S0: greenpeace backs anti-nuclear protesters

S1: police arrest five protesters in attempt to disrupt us research ship
S0: police arrest five anti-nuclear protesters



## Trying to understand what is the model in the checkpoint

In [9]:
t = "aan_useffn=False, accum_count=1, adagrad_accumulator_init=0, adam_beta1=0.9, adam_beta2=0.999, apex_opt_level='O2', audio_enc_pooling='1', batch_size=64, batch_type='sents', bridge=False, brnn=False, brnn_merge='concat', cnn_kernel_width=3, context_gate=None, copy_attn=True, copy_attn_force=False, copy_attn_type=None, copy_loss_by_seqlength=False, coverage_attn=False, data='../data/raw_shuffled/data', dec_layers=2, dec_rnn_size=500, decay_method='', decoder_type='rnn', dropout=0.3, enc_layers=2, enc_rnn_size=500, encoder_type='rnn', epochs=20, exp='', exp_host='', feat_merge='concat', feat_vec_exponent=0.7, feat_vec_size=-1, fix_word_vecs_dec=False, fix_word_vecs_enc=False, generator_function='softmax', global_attention='general', global_attention_function='softmax', gpuid=[0], heads=8, input_feed=1, label_smoothing=0.0, lambda_coverage=1, layers=-1, learning_rate=1.0, learning_rate_decay=0.5, loss_scale=0, max_generator_batches=32, max_grad_norm=5, max_relative_positions=0, model_dtype='fp32', model_type='text', normalization='sents', optim='sgd', param_init=0.1, position_encoding=False, pre_word_vecs_dec=None, pre_word_vecs_enc=None, report_every=50, reuse_copy_attn=True, rnn_size=500, rnn_type='LSTM', sample_rate=16000, save_model='../data/raw_shuffled/model-copy', seed=-1, self_attn_type='scaled-dot', share_decoder_embeddings=False, share_embeddings=False, src_word_vec_size=500, start_checkpoint_at=0, start_decay_at=8, start_epoch=1, tgt_word_vec_size=500, train_from='', transformer_ff=2048, truncated_decoder=0, valid_batch_size=32, warmup_steps=4000, window_size=0.02, word_vec_size=-1"

split_text = t.split(', ')
for line in split_text:
    print(line)

aan_useffn=False
accum_count=1
adagrad_accumulator_init=0
adam_beta1=0.9
adam_beta2=0.999
apex_opt_level='O2'
audio_enc_pooling='1'
batch_size=64
batch_type='sents'
bridge=False
brnn=False
brnn_merge='concat'
cnn_kernel_width=3
context_gate=None
copy_attn=True
copy_attn_force=False
copy_attn_type=None
copy_loss_by_seqlength=False
coverage_attn=False
data='../data/raw_shuffled/data'
dec_layers=2
dec_rnn_size=500
decay_method=''
decoder_type='rnn'
dropout=0.3
enc_layers=2
enc_rnn_size=500
encoder_type='rnn'
epochs=20
exp=''
exp_host=''
feat_merge='concat'
feat_vec_exponent=0.7
feat_vec_size=-1
fix_word_vecs_dec=False
fix_word_vecs_enc=False
generator_function='softmax'
global_attention='general'
global_attention_function='softmax'
gpuid=[0]
heads=8
input_feed=1
label_smoothing=0.0
lambda_coverage=1
layers=-1
learning_rate=1.0
learning_rate_decay=0.5
loss_scale=0
max_generator_batches=32
max_grad_norm=5
max_relative_positions=0
model_dtype='fp32'
model_type='text'
normalization='sents'
op

## Experimenting with abstract classes

In [10]:
from abc import ABC, abstractmethod, abstractproperty

class A(ABC):
    def __init__(self, value):
        self.value = value
        
    def blablabla(self):
        print('I shall say blablabla ' + str(self.value) + ' times')
    
class AA(A):
    def __init__(self, value):
        super().__init__(value)
    
    @abstractproperty
    def funky(self):
        return self._funky
    
    @property
    def super_funky(self):
        return self.funky * self.value

class B(AA):
    def __init__(self, value):
        super().__init__(value)
        self._funky = 100
    
    @property
    def funky(self):
        return self._funky

b = B(1000)
print(b.funky)
print(b.super_funky)
    

100
100000
