In [1]:
from pointer_net import PointerNetwork
import sys
import numpy as np
if int(sys.version[0]) == 2:
    from io import open


def read_data(path):
    with open(path, 'r', encoding='utf-8') as f:
        return f.read()
# end function


def build_map(data):
    specials = ['<GO>',  '<EOS>', '<PAD>', '<UNK>']
    chars = list(set([char for line in data.split('\n') for char in line]))
    chars = sorted(chars)
    idx2char = {idx: char for idx, char in enumerate(specials+chars)}
    char2idx = {char: idx for idx, char in idx2char.items()}
    return idx2char, char2idx
# end function


def preprocess_data(max_len):
    X_data = read_data('temp/letters_source.txt')
    Y_data = read_data('temp/letters_target.txt')

    X_idx2char, X_char2idx = build_map(X_data)
    print("==> Word Index Built")

    x_unk = X_char2idx['<UNK>']
    x_eos = X_char2idx['<EOS>']
    x_pad = X_char2idx['<PAD>']

    X_indices = []
    X_seq_len = []
    Y_indices = []
    Y_seq_len = []

    for x_line, y_line in zip(X_data.split('\n'), Y_data.split('\n')):
        x_chars = [X_char2idx.get(char, x_unk) for char in x_line]
        _x_chars = x_chars + [x_eos] + [x_pad]* (max_len-1-len(x_chars))
        
        y_chars = [X_char2idx.get(char, x_unk) for char in y_line]
        _y_chars = y_chars + [x_eos] + [x_pad]* (max_len-1-len(y_chars))
        target = [_x_chars.index(y) for y in _y_chars] # we are predicting the positions

        X_indices.append(_x_chars)
        Y_indices.append(target)
        X_seq_len.append(len(x_chars)+1)
        Y_seq_len.append(len(y_chars)+1)

    X_indices = np.array(X_indices)
    Y_indices = np.array(Y_indices)
    X_seq_len = np.array(X_seq_len)
    Y_seq_len = np.array(Y_seq_len)
    print("==> Sequence Padded")

    return X_indices, X_seq_len, Y_indices, Y_seq_len, X_char2idx, X_idx2char
# end function


def train_test_split(X_indices, X_seq_len, Y_indices, Y_seq_len, BATCH_SIZE):
    X_train = X_indices[BATCH_SIZE:]
    X_train_len = X_seq_len[BATCH_SIZE:]
    Y_train = Y_indices[BATCH_SIZE:]
    Y_train_len = Y_seq_len[BATCH_SIZE:]

    X_test = X_indices[:BATCH_SIZE]
    X_test_len = X_seq_len[:BATCH_SIZE]
    Y_test = Y_indices[:BATCH_SIZE]
    Y_test_len = Y_seq_len[:BATCH_SIZE]

    return (X_train, X_train_len, Y_train, Y_train_len), (X_test, X_test_len, Y_test, Y_test_len)
# end function


def main():
    BATCH_SIZE = 128
    MAX_LEN = 15
    X_indices, X_seq_len, Y_indices, Y_seq_len, X_char2idx, X_idx2char = preprocess_data(MAX_LEN)
    
    (X_train, X_train_len, Y_train, Y_train_len), (X_test, X_test_len, Y_test, Y_test_len) \
        = train_test_split(X_indices, X_seq_len, Y_indices, Y_seq_len, BATCH_SIZE)
    
    model = PointerNetwork(
        max_len = MAX_LEN,
        rnn_size = 50,
        X_word2idx = X_char2idx,
        embedding_dim = 15)
    
    model.fit(X_train, X_train_len, Y_train, Y_train_len,
        val_data=(X_test, X_test_len, Y_test, Y_test_len), batch_size=BATCH_SIZE, n_epoch=100)
    model.infer('common', X_idx2char)
    model.infer('apple', X_idx2char)
    model.infer('zhedong', X_idx2char)
# end main


if __name__ == '__main__':
    main()


==> Word Index Built
==> Sequence Padded
Epoch 1/100 | Batch 0/77 | train_loss: 2.708 | test_loss: 2.703
Epoch 1/100 | Batch 50/77 | train_loss: 2.159 | test_loss: 2.113
Epoch 2/100 | Batch 0/77 | train_loss: 1.826 | test_loss: 1.775
Epoch 2/100 | Batch 50/77 | train_loss: 1.440 | test_loss: 1.345
Epoch 3/100 | Batch 0/77 | train_loss: 1.061 | test_loss: 1.059
Epoch 3/100 | Batch 50/77 | train_loss: 0.812 | test_loss: 0.769
Epoch 4/100 | Batch 0/77 | train_loss: 0.689 | test_loss: 0.695
Epoch 4/100 | Batch 50/77 | train_loss: 0.628 | test_loss: 0.599
Epoch 5/100 | Batch 0/77 | train_loss: 0.556 | test_loss: 0.558
Epoch 5/100 | Batch 50/77 | train_loss: 0.516 | test_loss: 0.490
Epoch 6/100 | Batch 0/77 | train_loss: 0.467 | test_loss: 0.460
Epoch 6/100 | Batch 50/77 | train_loss: 0.434 | test_loss: 0.415
Epoch 7/100 | Batch 0/77 | train_loss: 0.409 | test_loss: 0.391
Epoch 7/100 | Batch 50/77 | train_loss: 0.377 | test_loss: 0.361
Epoch 8/100 | Batch 0/77 | train_loss: 0.361 | test_loss

Epoch 63/100 | Batch 50/77 | train_loss: 0.054 | test_loss: 0.071
Epoch 64/100 | Batch 0/77 | train_loss: 0.069 | test_loss: 0.084
Epoch 64/100 | Batch 50/77 | train_loss: 0.050 | test_loss: 0.070
Epoch 65/100 | Batch 0/77 | train_loss: 0.071 | test_loss: 0.090
Epoch 65/100 | Batch 50/77 | train_loss: 0.049 | test_loss: 0.068
Epoch 66/100 | Batch 0/77 | train_loss: 0.057 | test_loss: 0.083
Epoch 66/100 | Batch 50/77 | train_loss: 0.059 | test_loss: 0.076
Epoch 67/100 | Batch 0/77 | train_loss: 0.076 | test_loss: 0.097
Epoch 67/100 | Batch 50/77 | train_loss: 0.054 | test_loss: 0.073
Epoch 68/100 | Batch 0/77 | train_loss: 0.058 | test_loss: 0.081
Epoch 68/100 | Batch 50/77 | train_loss: 0.049 | test_loss: 0.071
Epoch 69/100 | Batch 0/77 | train_loss: 0.054 | test_loss: 0.080
Epoch 69/100 | Batch 50/77 | train_loss: 0.045 | test_loss: 0.067
Epoch 70/100 | Batch 0/77 | train_loss: 0.055 | test_loss: 0.083
Epoch 70/100 | Batch 50/77 | train_loss: 0.046 | test_loss: 0.071
Epoch 71/100 | Ba