In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from PICOHelper import get_pico_datasets
from NewsroomHelper import get_newsroom_datasets
from SummarizationModelStructures import GeneratorModel
from utils import DataLoader, get_index_words
from pytorch_helper import ModelManipulator, plot_learning_curves

# Parameters

In [2]:
# training parameters
BATCH_SIZE = 64
NUM_EPOCHS = 10
LEARNING_RATE = 1e-2
# INITIAL_ACCUMULATOR_VALUE = 0.1
GAMMA = 1
USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)

True


# Get Data

In [3]:
# pico_dataset_train, pico_dataset_dev, pico_dataset_test = get_pico_datasets()
newsroom_dataset_train, newsroom_dataset_dev, newsroom_dataset_test = get_newsroom_datasets()
word_vectors = newsroom_dataset_train.word_vectors
start_index = newsroom_dataset_train.word_indices['<start>']
end_index = newsroom_dataset_train.word_indices['<end>']

11029 3676 3678
retrieving word2vec model from file


# Model Structure

In [4]:
# text_encoder.num_hidden + summary_decoder.num_hidden
def loss(loss):
    return loss

def error(loss):
    return None

In [5]:
generator_model = GeneratorModel(word_vectors, start_index, end_index, num_hidden1=None, num_hidden2=None, with_coverage=False, gamma=GAMMA)

# Train and Save Model

In [None]:
dataloader = DataLoader(newsroom_dataset_train, batch_size=BATCH_SIZE, shuffle=True)
optimizer = torch.optim.Adam(generator_model.parameters(),
                             lr=LEARNING_RATE)
# optimizer = torch.optim.Adagrad((generator_model.cuda() if USE_CUDA else generator_model).parameters(),
#                                 lr=LEARNING_RATE, initial_accumulator_value=INITIAL_ACCUMULATOR_VALUE)
model_manip = ModelManipulator(generator_model, optimizer, loss, error, use_cuda=USE_CUDA)
train_stats, val_stats = model_manip.train(dataloader, NUM_EPOCHS, dataset_val=newsroom_dataset_dev, stats_every=10, verbose_every=10)

epoch: 0, batch: 0, train_loss: 216.238525, train_error: None
epoch: 0, batch: 10, train_loss: 158.997604, train_error: None
epoch: 0, batch: 20, train_loss: 182.418015, train_error: None
epoch: 0, batch: 30, train_loss: 208.012192, train_error: None
epoch: 0, batch: 40, train_loss: 143.501373, train_error: None
epoch: 0, batch: 50, train_loss: 156.999603, train_error: None
epoch: 0, batch: 60, train_loss: 86.735733, train_error: None
epoch: 0, batch: 70, train_loss: 185.347977, train_error: None
epoch: 0, batch: 80, train_loss: 113.356445, train_error: None
epoch: 0, batch: 90, train_loss: 92.096771, train_error: None
epoch: 0, batch: 100, train_loss: 102.141060, train_error: None
epoch: 0, batch: 110, train_loss: 96.801842, train_error: None
epoch: 0, batch: 120, train_loss: 82.156792, train_error: None
epoch: 0, batch: 130, train_loss: 103.128029, train_error: None
epoch: 0, batch: 140, train_loss: 100.096527, train_error: None
epoch: 0, batch: 150, train_loss: 103.898499, train_err

In [None]:
torch.save(generator_model, 'data/generator_test.model')

# Plot

In [None]:
plot_learning_curves(training_values=train_stats, validation_values=val_stats, figure_name='summarization_training_test')

In [None]:
batch = newsroom_dataset_dev[0:5]
generated_output = generator_model(batch['text'].cuda(), batch['text_length'].cuda())
# generated_output = generator_model(batch['text'], batch['text_length'])

In [None]:
for i,indices in enumerate(generated_output[1]):
    text, l = batch['text'][i], batch['text_length'][i]
    print("text", get_index_words(text[:l], newsroom_dataset_train.words))
    text, l = batch['summary'][i], batch['summary_length'][i]
    print("summary", get_index_words(text[:l], newsroom_dataset_train.words))
    print("generated summary", get_index_words(indices[:generated_output[2][i]], newsroom_dataset_train.words))
print(generated_output[0])