# Pointer Generator Model

In [1]:
import torch
from PICOHelper import get_pico_datasets
from NewsroomHelper import get_newsroom_datasets
from models import Summarizer
from model_helpers import loss_function, error_function
from utils import get_index_words, produce_attention_visualization_file
from pytorch_helper import VariableBatchDataLoader, ModelManipulator, plot_learning_curves

## Parameters

In [2]:
# training parameters
BATCH_SIZE = 64
NUM_EPOCHS = 2
LEARNING_RATE = 1e-2
# INITIAL_ACCUMULATOR_VALUE = 0.1
GAMMA = 1
USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)
BEAM_SIZE = 4

True


## Get Data

In [3]:
# pico_dataset_train, pico_dataset_dev, pico_dataset_test = get_pico_datasets()
newsroom_dataset_train, newsroom_dataset_dev, newsroom_dataset_test = get_newsroom_datasets(with_oov=True)
word_vectors = newsroom_dataset_train.word_vectors
start_index = newsroom_dataset_train.word_indices['<start>']
end_index = newsroom_dataset_train.word_indices['<end>']

11029 3676 3678
retrieving word2vec model from file


## Create Model

In [None]:
pointer_generator_model = Summarizer(word_vectors, start_index, end_index, num_hidden1=None, num_hidden2=None, with_coverage=True, gamma=GAMMA, with_pointer=True)

## Train and Save Model

In [None]:
dataloader = VariableBatchDataLoader(newsroom_dataset_train, batch_size=BATCH_SIZE, shuffle=True)
optimizer = torch.optim.Adam(pointer_generator_model.parameters(),
                             lr=LEARNING_RATE)
# optimizer = torch.optim.Adagrad((pointer_generator_model.cuda() if USE_CUDA else pointer_generator_model).parameters(),
#                                 lr=LEARNING_RATE, initial_accumulator_value=INITIAL_ACCUMULATOR_VALUE)
model_manip = ModelManipulator(pointer_generator_model, optimizer, loss_function, error_function, use_cuda=USE_CUDA)
train_stats, val_stats = model_manip.train(dataloader, NUM_EPOCHS, dataset_val=newsroom_dataset_dev, stats_every=10, verbose_every=10)

In [None]:
torch.save(pointer_generator_model, 'models/pointer_generator_temp.model')

In [4]:
pointer_generator_model = torch.load('models/pointer_generator_temp.model')

## Plot

In [None]:
plot_learning_curves(training_values=train_stats, validation_values=val_stats, figure_name='graphs/pointer_generator_training_temp')

In [5]:
batch = newsroom_dataset_dev[3:4]
results, oov_indices = pointer_generator_model(batch['text'].cuda(), batch['text_length'].cuda(), batch['text_oov_indices'], beam_size=BEAM_SIZE)
# generated_output = generator_model(batch['text'], batch['text_length'])

  output, (h, c) = self.lstm(x)


In [6]:
loss, summary_info = results[0]
for i in range(len(summary_info[0])):
    summary_indices, summary_length = summary_info[0][i], summary_info[1][i]
    r_summary_indices, r_summary_length = batch['summary'][i].numpy(), batch['summary_length'][i].numpy()
    text_indices, text_length = batch['text'][i].numpy(), batch['text_length'][i].numpy()
    oov_words = {v:k for k,v in batch['text_oov_indices'][i].items()}
    
    text = get_index_words(text_indices[:text_length], newsroom_dataset_train.words, oov_words=oov_words)
    reference_summary = get_index_words(r_summary_indices[:r_summary_length], newsroom_dataset_train.words, oov_words=oov_words)
    decoded_summary = get_index_words(summary_indices[:summary_length], newsroom_dataset_train.words, oov_words=oov_words)
    print("text", text)
    print("reference summary", reference_summary)
    print("decoded summary", decoded_summary)
    print(loss[i])

text ['<start>', 'new', 'york', 'city', 'detectives', 'on', 'tuesday', 'released', 'the', 'name', 'of', 'a', 'man', 'seen', 'leaving', 'the', 'bronx', 'subway', 'station', 'where', 'a', 'rider', 'was', 'pushed', 'to', 'his', 'death', 'over', 'the', 'weekend', '.', 'the', 'man', 'being', 'sought', 'by', 'police', ',', 'kevin', 'darden', ',', 'qqq', ',', 'was', 'described', 'as', 'a', 'person', 'of', 'interest', 'in', 'the', 'killing', '–', 'not', 'as', 'a', 'suspect', '–', 'and', 'detectives', 'were', '“', 'endeavoring', 'to', 'locate', '”', 'him', 'for', 'questioning', ',', 'according', 'to', 'a', 'poster', 'seeking', 'information', '.', 'the', 'police', 'said', 'investigators', 'believed', 'mr.', 'darden', 'was', 'the', 'man', 'seen', 'on', 'surveillance', 'video', 'walking', 'calmly', 'from', 'the', 'qqq', 'street', 'subway', 'station', 'where', 'the', 'rider', ',', 'wai', 'kuen', 'kwok', ',', 'qqq', ',', 'was', 'shoved', 'into', 'the', 'path', 'of', 'an', 'oncoming', 'subway', 'car'

In [7]:
_, summary_info = results[0]
i = 0

summary_indices, summary_length = summary_info[0][i], summary_info[1][i]
attentions, p_gens = [[float(f) for f in vector[1:-1]] for vector in summary_info[3][i][:-1]], [float(f) for f in summary_info[4][i][:-1]]
r_summary_indices, r_summary_length = batch['summary'][i].numpy(), batch['summary_length'][i].numpy()
text_indices, text_length = batch['text'][i].numpy(), batch['text_length'][i].numpy()
oov_words = {v:k for k,v in batch['text_oov_indices'][i].items()}

text = get_index_words(text_indices[1:text_length-1], newsroom_dataset_train.words, oov_words=oov_words)
reference_summary = get_index_words(r_summary_indices[1:r_summary_length-1], newsroom_dataset_train.words, oov_words=oov_words)
decoded_summary = get_index_words(summary_indices[1:summary_length-1], newsroom_dataset_train.words, oov_words=oov_words)
produce_attention_visualization_file('graphs/attn_vis_data.json', text, decoded_summary, " ".join(reference_summary), attentions, p_gens)