In [1]:
import numpy as np

from dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq
from attention import AttentionSeq2seq2
from ch7.seq2seq import Seq2seq
from ch7.peeky_seq2seq import PeekySeq2seq

In [2]:
# データ読み込み
(x_train, t_train), (x_test, t_test) = sequence.load_data("dataset/date.txt")
char_to_id, id_to_char = sequence.get_vocab()

print(x_train.shape, x_test.shape)

x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

(45000, 29) (5000, 29)


In [3]:
print(x_train.shape, t_train.shape)
type(char_to_id), char_to_id["s"]

(45000, 29) (45000, 11)


(dict, 0)

In [4]:
# ハイパーパラメータ
vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 256
batch_size = 128
max_epoch = 10
max_grad = 5.0

vocab_size

59

In [5]:
model = AttentionSeq2seq2(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

In [6]:
%%time
acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad)
    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose, is_reverse=True)

    acc = float(correct_num) / len(x_test)
    acc_list.append(acc)
    print("val acc %.3f%%" % (acc*100))

[Trainer] batch_x: (128, 29)
[Trainer] batch_t: (128, 11)
[Seq2seq.forward] xs: (128, 29), ts: (128, 11)
[AttentionDecoder.forward] out: (128, 10, 16)
[AttentionDecoder.forward] dec_hs: (128, 10, 256) <- この長さが理想の10倍だった
[AttentionDecoder.forward] c: (128, 10, 256)
[AttentionDecoder.forward] out: (128, 10, 512)
[TimeAffine.forward] x: (128, 10, 512)
[TimeAffine.forward] rx: (1280, 512)
[TimeAffine.forward] return: (128, 10, 59)
[AttentionDecoder.forward] score: (128, 10, 59)
[TimeSoftmaxWithLoss] xs: (128, 10, 59)
[Seq2seq] dout: (128, 10, 59)
[TimeAffine.backward] return dx: (128, 10, 512)
[TimeAttention.backward] dout: (128, 10, 256)
| epoch 1 |  iter 1 / 351 | time 0[s] | loss 4.08
[Trainer] batch_x: (128, 29)
[Trainer] batch_t: (128, 11)
[Seq2seq.forward] xs: (128, 29), ts: (128, 11)
[AttentionDecoder.forward] out: (128, 10, 16)
[AttentionDecoder.forward] dec_hs: (128, 10, 256) <- この長さが理想の10倍だった
[AttentionDecoder.forward] c: (128, 10, 256)
[AttentionDecoder.forward] out: (128, 10, 51

KeyboardInterrupt: 

In [None]:
# model.save_params()