In [6]:
import pickle
import numpy as np
import torch.nn as nn
from torch import optim

from model import ArticleSummarizer
from model_utils import train, evaluate, get_data_chunk

In [None]:
ten_sent_batch = torch.tensor(
    [
        np.random.random((10,768)), 
        np.random.random((10,768)), 
        np.random.random((10,768))
    ], 
    dtype=torch.float
)
ten_sent_batch.shape # batchSize x seqLen x embSize

# summary_batch = torch.tensor(     # current summary sents + next sent (at end)
#     rnn.pad_sequence([
#         torch.tensor(np.random.random((2,768))),
#         torch.tensor(np.random.random((1,768))),
#         torch.tensor(np.random.random((4,768)))
#     ], batch_first=True), 
#     dtype=torch.float
# )
# input_lengths = [2,1,4]
# summary_batch.shape

gold_actions = np.array([
    [2, 1, 2, 1, 1, 2, 2, 2, 1, 2],
    [1, 1, 2, 2, 2, 1, 1, 2, 2, 2],
    [1, 2, 1, 2, 2, 2, 2, 2, 1, 1]
])
gold_actions.shape

In [7]:
data_base = 'data'
dataset_name = 'cnn_dailymail'

data_path = data_base+'/'+dataset_name

model_data_folder = data_path+'/top_sentence_embs'
model_save_path = 'summarizer_model_1.pt'

In [8]:
batch_size = 3
num_epochs = 2

In [9]:
mydevice = 'cuda'
mydevice = 'cpu'

In [10]:
summarizer1 = ArticleSummarizer(
    input_size=768, hidden_size=64, output_size=2, 
    enc_seq_len=10, summary_max_len=4, batch_size=batch_size, 
    enc_num_layers=1, enc_bidirectional=False, 
    dec_num_layers=1, dec_bidirectional=False, attention_method="key:encoder",
    use_teacher_forcing=True, device=mydevice
).to(mydevice)

In [11]:
criterion = nn.NLLLoss()
model_optimizer = optim.Adam(summarizer1.parameters(), lr=0.001)

In [12]:
eval_chunk, num_e_batches = get_data_chunk(model_data_folder, 'validation', 1, batch_size)
eval_chunk = eval_chunk[:-1] # ignore last batch as its not same size

In [None]:
losses = []
for epoch in range(num_epochs):
    c_losses = []
    for chunk_num in range(1,6):
        train_chunk, num_batches = get_data_chunk(model_data_folder, 'train', chunk_num, batch_size)
        train_chunk = train_chunk[:-1] # ignore last batch as its not same size
        summarizer1, model_optimizer, loss, info = train(summarizer1, criterion, model_optimizer, train_chunk)
        del train_chunk
        c_losses.append(loss)
    loss = np.mean(c_losses)
    losses.append(loss)
    e_loss, e_acc, e_info = evaluate(summarizer1, criterion, eval_chunk)
    print('Epoch:',epoch,'- Train Loss:',loss,'- Eval Loss:',e_loss,'- Eval Acc:',e_acc)
    torch.save(summarizer1, model_save_path)
    
print('All Epoch Train Loss:', losses)
print('Final Eval Accuracy:', e_acc)
torch.save(summarizer1, model_save_path)

In [None]:
# train_chunk, num_batches = get_data_chunk(model_data_folder, 'train', 1, batch_size)
# eval_chunk = train_chunk[:10]

# summarizer1, model_optimizer, loss, info = train(summarizer1, criterion, model_optimizer, train_chunk[:10])
# e_loss, e_acc, e_info = evaluate(summarizer1, criterion, eval_chunk)
# print('Epoch:',1,'- Train Loss:',loss,'- Eval Loss:',e_loss,'- Eval Acc:',e_acc)