<a href="https://colab.research.google.com/github/ekity1002/DL4US_homeworks/blob/master/lesson4_cv10_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lesson4 ニューラル翻訳モデルを作ってみよう（Seq2Seq, Attention）

## Homework

RNNを用いて高精度な英日翻訳器を実装してみましょう。

ネットワークの形などは特に制限を設けませんし、今回のLessonで扱った内容以外の工夫も組み込んでもらって構いません。

精度上位者はリーダーボードに掲載させていただきます。（精度の評価はBLEUスコアによって行います。）

## 目標値

BLEU: 0.15

## ルール

- 以下のサンプルも参考にしながら翻訳文を生成しcsvファイルに出力して下さい。
- BLEUスコア(4-gramまで)で評価します。
- 学習データとテストデータの入力の系列長はpaddingで揃えてあります。

## 評価について

- テストデータ(x_test)に対する予測ラベルをcsvファイルで提出してください。
- ファイル名はsubmission.csvとしてください。
- 予測ラベルのy_testに対する精度 (BLEU)で評価します。
- 毎日24時にテストデータの一部に対する精度でLeader Boardを更新します。
- 最終的な評価はテストデータ全体に対する精度でおこないます。

## サンプルコード

次のセルで指定されているx_train, y_trainのみを使って学習させてください。　

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import csv
from multiprocessing import Pool

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import add, average, Activation, Bidirectional, CuDNNLSTM, concatenate, Dense, dot, Embedding, Input, LSTM
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import backend as K
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings('ignore')

from functools import partial
np.load = partial(np.load, allow_pickle=True)  # monkey patch

def load_data():
    # 学習データ
    # x_train = np.load('/root/userspace/lesson4/data/x_train.npy')
    # y_train = np.load('/root/userspace/lesson4/data/y_train.npy')
    # tokenizer_en = np.load('/root/userspace/lesson4/data/tokenizer_en.npy').item()
    # tokenizer_ja = np.load('/root/userspace/lesson4/data/tokenizer_ja.npy').item()

    x_train = np.load('/content/drive/My Drive/Colab Notebooks/x_train.npy')
    y_train = np.load('/content/drive/My Drive/Colab Notebooks/y_train.npy')
    tokenizer_en = np.load('/content/drive/My Drive/Colab Notebooks/tokenizer_en.npy').item()
    tokenizer_ja = np.load('/content/drive/My Drive/Colab Notebooks/tokenizer_ja.npy').item()

    # テストデータ
    x_test = np.load('/content/drive/My Drive/Colab Notebooks/x_test.npy')

    return (x_train, y_train, tokenizer_en, tokenizer_ja, x_test)

x_train, y_train, tokenizer_en, tokenizer_ja, x_test = load_data()

In [0]:
emb_dim = 1024
hid_dim = 1024
att_dim = 1024

# LSTM mask_zero オプション使うので +1 する
en_vocab_size = len(tokenizer_en.word_index) + 1
ja_vocab_size = len(tokenizer_ja.word_index) + 1

seqX_len = len(x_train[0])
seqY_len = len(y_train[0])

encoder_states = []

train_target = np.hstack((y_train[:, 1:], np.zeros((len(y_train),1), dtype=np.int32)))
bos_eos = tokenizer_ja.texts_to_sequences(["<s>", "</s>"])

In [0]:
def build_attention_models():
    # 訓練用モデル
    # 符号化器
    layer_num = 2
    encoder_inputs = Input(shape=(seqX_len,))
    encoder_embedded = Embedding(en_vocab_size, emb_dim, mask_zero=True)(encoder_inputs)

#     encoder_states = []
#     encoded_seq, *states = LSTM(hid_dim, return_sequences=True, return_state=True)(encoder_embedded)
#     encoder_states.append(states)
#     encoded_seq, *states = LSTM(hid_dim, return_sequences=True, return_state=True)(encoded_seq)
#     encoder_states.append(states)
    encoded_seq, *encoder_states = LSTM(hid_dim, return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2)(encoder_embedded)
    print(encoder_states)
    
    # 復号化器（encoder_statesを初期状態として指定）
    decoder_inputs = Input(shape=(seqY_len,))
    decoder_embedding = Embedding(ja_vocab_size, emb_dim)
    decoder_embedded = decoder_embedding(decoder_inputs)
    
#     decoder_lstm_1 = CuDNNLSTM(hid_dim, return_sequences=True, return_state=True)
#     decoder_lstm_2 = CuDNNLSTM(hid_dim, return_sequences=True, return_state=True)
    decoder_lstm = LSTM(hid_dim, return_sequences=True, return_state=True,dropout=0.2, recurrent_dropout=0.2)
#     decoded_seq, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states)
#     for i, s in enumerate(encoder_states):
#         if i == 0:
#             decoded_seq, _, _ = decoder_lstm_1(decoder_embedded, initial_state=s)
#         else:
#             decoded_seq, _, _ = decoder_lstm_2(decoded_seq, initial_state=s)
    decoded_seq, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states)

    # Attention
    print('decoded_seq: ', decoded_seq)
    score_dense = Dense(hid_dim)
    score = score_dense(decoded_seq) # shape: (seqY_len, hid_dim) -> (seqY_len, hid_dim)
    print('score, encoded_seq: ',score, encoded_seq)
    score = dot([score, encoded_seq], axes=(2,2))           # shape: [(seqY_len, hid_dim), (seqX_len, hid_dim)] -> (seqY_len, seqX_len)
    #print('dot score: ',score)
    attention = Activation('softmax')(score)                # shape: (seqY_len, seqX_len) -> (seqY_len, seqX_len)
    #print('attention', attention)
    context = dot([attention, encoded_seq], axes=(2,1))     # shape: [(seqY_len, seqX_len), (seqX_len, hid_dim)] -> (seqY_len, hid_dim)
    #print('context', context)
    concat = concatenate([context, decoded_seq], axis=2)    # shape: [(seqY_len, hid_dim), (seqY_len, hid_dim)] -> (seqY_len, 2*hid_dim)
    attention_dense = Dense(att_dim, activation='tanh')
    attentional = attention_dense(concat)                   # shape: (seqY_len, 2*hid_dim) -> (seqY_len, att_dim)
    output_dense = Dense(ja_vocab_size, activation='softmax')
    outputs = output_dense(attentional)                     # shape: (seqY_len, att_dim) -> (seqY_len, ja_vocab_size)

    model = Model([encoder_inputs, decoder_inputs], outputs)
    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')

    # -----------------------------------
    # 生成用モデル
    encoder_model = Model(encoder_inputs, [encoded_seq]+encoder_states)
    #encoder_model = Model(encoder_inputs, [encoded_seq]+encoder_states[-1])

    #decoder_states_inputs1 = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))]
    #decoder_states_inputs2 = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))]
    decoder_states_inputs = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))]
    decoder_inputs = Input(shape=(1,))
    decoder_embedded = decoder_embedding(decoder_inputs)
    
#     for i, s in enumerate(encoder_states):
#         if i == 0:
#             decoder_seq, *decoder_states = decoder_lstm_1(decoder_embedded, initial_state=decoder_states_inputs1)
#         else:
#             decoder_seq, *decoder_states = decoder_lstm_2(decoder_seq, initial_state=decoder_states_inputs2)
    decoder_seq, *decoder_states = decoder_lstm(decoder_embedded, initial_state=decoder_states_inputs)
    print(decoder_seq)
    print(decoder_states)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_seq] + decoder_states)

    # Attention
    encoded_seq_in, decoded_seq_in = Input(shape=(seqX_len, hid_dim)), Input(shape=(1, hid_dim))
    score =score_dense(decoded_seq_in)
    score = dot([score, encoded_seq_in], axes=(2,2))
    attention = Activation('softmax')(score)
    context = dot([attention, encoded_seq_in], axes=(2,1))
    concat = concatenate([context, decoded_seq_in], axis=2)
    attentional = attention_dense(concat)
    attention_outputs = output_dense(attentional)

    attention_model = Model([encoded_seq_in, decoded_seq_in], [attention_outputs, attention])
    return model, encoder_model, decoder_model, attention_model
_,_,_,_ = build_attention_models()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
[<tf.Tensor 'lstm/while/Exit_3:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm/while/Exit_4:0' shape=(?, 1024) dtype=float32>]
decoded_seq:  Tensor("lstm_1/transpose_1:0", shape=(?, 18, 1024), dtype=float32)
score, encoded_seq:  Tensor("dense/BiasAdd:0", shape=(?, 18, 1024), dtype=float32) Tensor("lstm/transpose_2:0", shape=(?, 18, 1024), dtype=float32)
Tensor("lstm_1_1/transpose_1:0", shape=(?, 1, 1024), dtype=float32)
[<tf.Tensor 'lstm_1_1/while/Exit_2:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm_1_1/while/Exit_3:0' shape=(?, 1024) dtype=float32>]


In [0]:
def plot_acc_and_loss(history):
    """plot history"""
    fig, ax = plt.subplots(1,2, figsize=(15, 5))
    
#     acc = history.history['acc']
#     val_acc = history.history['val_acc']    
#     epochs=range(1, len(acc)+1)    
#     ax[0].plot(epochs, acc, label='Train')
#     ax[0].plot(epochs, val_acc, label='Val')
#     ax[0].legend()
#     ax[0].set_title('Accuracy')
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs=range(1, len(loss)+1)
    ax[1].plot(epochs, loss, label='Train')
    ax[1].plot(epochs, val_loss, label='Val')
    ax[1].legend()
    ax[1].set_title('Loss')
    plt.show()

In [0]:
epochs=1
batch_size=512
callbacks = [
    EarlyStopping(monitor='val_loss', patience=20),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10),
    #ModelCheckpoint(filepath=modelpath, monitor='val_acc', save_best_only=True)
]
train_model, encoder_model, decoder_model, attention_model = build_attention_models()
history = train_model.fit([x_train, y_train], np.expand_dims(train_target, -1), 
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.2, 
                    callbacks=callbacks
                   )

[<tf.Tensor 'lstm_2/while/Exit_3:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm_2/while/Exit_4:0' shape=(?, 1024) dtype=float32>]
decoded_seq:  Tensor("lstm_3/transpose_1:0", shape=(?, 18, 1024), dtype=float32)
score, encoded_seq:  Tensor("dense_3_1/BiasAdd:0", shape=(?, 18, 1024), dtype=float32) Tensor("lstm_2/transpose_2:0", shape=(?, 18, 1024), dtype=float32)
Tensor("lstm_3_1/transpose_1:0", shape=(?, 1, 1024), dtype=float32)
[<tf.Tensor 'lstm_3_1/while/Exit_2:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm_3_1/while/Exit_3:0' shape=(?, 1024) dtype=float32>]
Train on 39200 samples, validate on 9800 samples


In [0]:
def decode_sequence(input_seq, bos_eos, max_output_length,
                    encoder_model, decoder_model, attention_model):
    """
    input_seq : 入力する文 array-like, shape=(1, seq_len)
    bos_eos : 文の最初と最後を表す特殊文字のインデックス list
    max_output_length : 最大出力語数
    """
    #states_value = encoder_model.predict(input_seq)
    encoded_seq, *states_value = encoder_model.predict(input_seq)
    #print(encoded_seq)
    #print(states_value)
    target_seq = np.array(bos_eos[0])
    output_seq = bos_eos[0][:]
    attention_seq = np.empty((0, len(input_seq[0]))) #初期化せずに配列生成
    #print("seqs:", target_seq, output_seq, attention_seq)

    while True:
        decoded_seq, *states_value = decoder_model.predict([target_seq] + states_value) #開始を表す一文字と encoder_model の状態のリスト
        output_tokens, attention = attention_model.predict([encoded_seq, decoded_seq])
        print("---------")
        print(np.array(output_tokens).shape, output_tokens)
        sampled_token_index = [np.argmax(output_tokens[0, -1, :])]
        output_seq += sampled_token_index
        attention_seq = np.append(attention_seq, attention[0], axis=0)
        
        if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length):
            break

        target_seq = np.array(sampled_token_index)

    return output_seq, attention_seq

def mean_bleu(x_train, y_train, encoder_model, decoder_model, attention_model,
              start_i=0, end_i=None):
    """データでBELUの平均値確認"""
    out_seqs, att_seqs = [], []
    bos_eos = tokenizer_ja.texts_to_sequences(["<s>", "</s>"])
    if end_i is None:
        end_i = len(x_train)

    for i in range(len(x_train[:end_i])):
        out, att = decode_sequence(x_train[i][np.newaxis,:], bos_eos, 100,
                                   encoder_model, decoder_model, attention_model)
        out_seqs.append(out)
        att_seqs.append(att)
    tr_output = [out_seqs[i][1:-1] for i in range(len(x_train[:end_i]))]    

    pred_output = [[tokenizer_ja.index_word[idx] for idx in seq if idx != 0] for seq in tr_output]
    true_output = [[tokenizer_ja.index_word[idx] for idx in seq if idx != 0] for seq in y_train]
    bleu_scores = [sentence_bleu([true[1:-1]], pred) for true, pred in zip(true_output, pred_output)]

    print("Mean BLEU: ", np.mean(bleu_scores))

    return pred_output, true_output, bleu_scores

In [0]:
_, _, _ = mean_bleu(x_train, y_train, encoder_model, decoder_model, attention_model, start_i=0, end_i=10)

---------
(1, 1, 8777) [[[2.8475255e-05 3.6483220e-07 3.8614085e-05 ... 4.1695339e-06
   5.1268654e-07 1.0325260e-06]]]
---------
(1, 1, 8777) [[[5.5915542e-04 1.4668708e-09 3.7480233e-05 ... 3.7789775e-08
   1.2116634e-09 6.9678023e-09]]]
---------
(1, 1, 8777) [[[3.1767040e-06 1.4122185e-06 1.0480249e-05 ... 1.0595589e-05
   1.8599945e-06 3.1049542e-06]]]
---------
(1, 1, 8777) [[[3.9614958e-04 1.3407708e-09 2.4707962e-05 ... 3.2808437e-08
   9.9559727e-10 6.2034462e-09]]]
---------
(1, 1, 8777) [[[2.1115454e-06 2.8455624e-06 1.0275919e-05 ... 1.6357710e-05
   3.9208103e-06 5.6678000e-06]]]
---------
(1, 1, 8777) [[[4.0173464e-04 1.8034043e-09 2.5420295e-05 ... 4.2256229e-08
   1.3417391e-09 8.4188425e-09]]]
---------
(1, 1, 8777) [[[2.6684816e-06 3.5486041e-06 1.2104732e-05 ... 1.9054534e-05
   5.0599633e-06 7.0627693e-06]]]
---------
(1, 1, 8777) [[[3.9748143e-04 2.0554729e-09 2.4133853e-05 ... 4.7358501e-08
   1.5522479e-09 9.7137800e-09]]]
---------
(1, 1, 8777) [[[3.2902797e-06 

KeyboardInterrupt: ignored

In [0]:
# check prediction
def check_test_pred(x_test, encoder_model, decoder_model, attention_model,
                    start_i=0, end_i=None):
    """test データの文書と翻訳結果を見る"""
    out_seqs, att_seqs = [], []
    bos_eos = tokenizer_ja.texts_to_sequences(["<s>", "</s>"])
    if end_i is None:
        end_i = len(x_train)
    for i in range(len(x_test[:end_i])):
        out, att = decode_sequence(x_test[i][np.newaxis,:], bos_eos, 100,
                                   encoder_model, decoder_model, attention_model)
        out_seqs.append(out)
        att_seqs.append(att)

    pred_test = [out_seqs[i][1:-1] for i in range(len(x_test[:end_i]))]
    pred_test = [[tokenizer_ja.index_word[idx] for idx in seq if idx != 0] for seq in pred_test]
    test_eng = [[tokenizer_en.index_word[idx] for idx in seq if idx != 0] for seq in x_test]

    for test, pred in zip(test_eng, pred_test):
        print("----------------------------")
        print(f"eng  sentence: {test[1:-1]}")
        print(f"pred sentence: {pred}")
        print("----------------------------")
    return pred_test, test_eng
#pred_test, test_eng = check_test_pred(x_test, encoder_model, decoder_model, attention_model, end_i=10)

In [0]:
# csv出力
def write_csv(filepath, x_test, bos_eos,
              encoder_model, decoder_model, attention_model):
    out_seqs, att_seqs = [], []
    for i in range(x_test.shape[0]):
        out, att = decode_sequence(x_test[i][np.newaxis,:], bos_eos, 100,
                                   encoder_model, decoder_model, attention_model)
        out_seqs.append(out)
        att_seqs.append(att)
        #print(out)
    output = [out_seqs[i][1:-1] for i in range(len(x_test))]
    #output = [decode_sequence(x_test[i][np.newaxis,:], bos_eos, 100)[1:-1] for i in range(len(x_test))]
    #print(output)
    with open(filepath, 'w') as file:
        writer = csv.writer(file, lineterminator='\n')
        writer.writerows(output)

#filepath = 'colab_attention_exp.csv'
#write_csv(filepath, x_test, bos_eos)

In [0]:
SEED = 20000
def run_cv(x_train, y_train, test, params={}):
    N = 10
    kf = KFold(n_splits=N, random_state=SEED)
    fold_splits = kf.split(x_train, y_train)
    tr_scores = []
    val_scores = []
    #results = np.zeros((test.shape[0], N))
    train_models, encoder_models, decoder_models, attention_models = [], [], [], []
    i = 0
    max_mean = 0
    best_bleu, best_idx = 0, 0
    for tr_idx, val_idx in fold_splits:
        print(f'Start fold {i+1}/{N}')
#         if i!=2:
#             i+=1
#             continue
        tr_X, val_X = x_train[tr_idx, :], x_train[val_idx, :]
        tr_y, val_y = y_train[tr_idx, :], y_train[val_idx, :]
        print(tr_X.shape, val_X.shape)
        print(tr_y.shape, val_y.shape)

        epochs=1000
        batch_size=512
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=8),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5),
            #ModelCheckpoint(filepath=modelpath, monitor='val_acc', save_best_only=True)
        ]
        train_model, encoder_model, decoder_model, attention_model = build_attention_models()
        train_target = np.hstack((tr_y[:, 1:], np.zeros((len(tr_y),1), dtype=np.int32)))
        val_target = np.hstack((val_y[:, 1:], np.zeros((len(val_y),1), dtype=np.int32)))
        history = train_model.fit([tr_X, tr_y], np.expand_dims(train_target, -1), 
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=([val_X, val_y], val_target),
                            callbacks=callbacks
                        )
        
        # BLEU 確認
        print("Calcurating Val BLEU...")
        _,_, bleu_scores = mean_bleu(val_X, val_y, encoder_model, decoder_model, attention_model, end_i=4000)
        if np.mean(bleu_scores) > best_bleu:
            best_idx = i
            best_bleu = np.mean(bleu_scores)
        i+=1
        #return train_model, encoder_model, decoder_model, attention_model
        train_models.append(train_model)
        encoder_models.append(encoder_model)
        decoder_models.append(decoder_model)
        attention_models.append(attention_model)
        
        
    #print('mean acc: ', sum(val_scores)/len(val_scores))
    print("-------------- finish CV --------------")
    print("best Mean BLEU: ", best_bleu)
    return train_models[best_idx], encoder_models[best_idx], decoder_models[best_idx], attention_models[best_idx]
best_train_model, best_encoder_model, best_decoder_model, best_attention_model = run_cv(x_train, y_train, x_test)

Start fold 1/10
(44100, 18) (4900, 18)
(44100, 18) (4900, 18)
[<tf.Tensor 'lstm_5/while/Exit_3:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm_5/while/Exit_4:0' shape=(?, 1024) dtype=float32>]
decoded_seq:  Tensor("lstm_6/transpose_1:0", shape=(?, 18, 1024), dtype=float32)
score, encoded_seq:  Tensor("dense_6/BiasAdd:0", shape=(?, 18, 1024), dtype=float32) Tensor("lstm_5/transpose_2:0", shape=(?, 18, 1024), dtype=float32)
Tensor("lstm_6_1/transpose_1:0", shape=(?, 1, 1024), dtype=float32)
[<tf.Tensor 'lstm_6_1/while/Exit_2:0' shape=(?, 1024) dtype=float32>, <tf.Tensor 'lstm_6_1/while/Exit_3:0' shape=(?, 1024) dtype=float32>]
Train on 44100 samples, validate on 4900 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Calcurating Val

In [0]:
_,_, bleu_scores = mean_bleu(x_train, y_train, best_encoder_model, best_decoder_model, best_attention_model, end_i=10)

Mean BLEU:  0.947497172457975


In [0]:
best_train_model.save('/content/drive/My Drive/Colab Notebooks/train_model.h5', include_optimizer=False)
best_encoder_model.save('/content/drive/My Drive/Colab Notebooks/enc_model.h5', include_optimizer=False)
best_decoder_model.save('/content/drive/My Drive/Colab Notebooks/dec_model.h5', include_optimizer=False)
best_attention_model.save('/content/drive/My Drive/Colab Notebooks/att_model.h5', include_optimizer=False)

In [0]:
_, _ = check_test_pred(x_test, best_encoder_model, best_decoder_model, best_attention_model, end_i=100)

----------------------------
eng  sentence: ['he', 'could', 'not', 'believe', 'his', 'ears', '.']
pred sentence: ['彼', 'は', '自分', 'の', '言', 'う', 'こと', 'が', '信', 'じ', 'られ', 'な', 'かっ', 'た', '。']
----------------------------
----------------------------
eng  sentence: ['my', 'mother', 'is', 'to', 'meet', 'with', 'my', 'homeroom', 'teacher', 'tomorrow', '.']
pred sentence: ['母', 'は', '明日', 'の', '先生', 'と', '会', 'う', 'こと', 'に', 'な', 'っ', 'て', 'い', 'る', '。']
----------------------------
----------------------------
eng  sentence: ['i', 'don', "'t", 'want', 'to', 'be', 'involved', 'in', 'that', 'matter', '.']
pred sentence: ['その', '問題', 'に', 'は', 'これ', 'ら', 'な', 'く', 'て', 'は', 'な', 'ら', 'な', 'い', '。']
----------------------------
----------------------------
eng  sentence: ['it', 'took', 'me', 'two', 'hours', 'to', 'get', 'to', 'yokohama', '.']
pred sentence: ['私', 'は', '東京', 'へ', '行', 'く', 'の', 'に', '２', '時間', 'かか', 'っ', 'た', '。']
----------------------------
----------------------------
eng 

In [0]:
filepath = '/content/drive/My Drive/Colab Notebooks/cv10_attention.csv'
write_csv(filepath, x_test, bos_eos, best_encoder_model, best_decoder_model, best_attention_model)

In [0]:
best_train_model.save('submission3_train_model.h5', include_optimizer=False)
best_encoder_model.save('submission3_enc.h5', include_optimizer=False)
best_decoder_model.save('/content/drive/My Drive/Colab Notebooks/dec_model.h5', include_optimizer=False)
best_attention_model.save('/content/drive/My Drive/Colab Notebooks/att_model.h5', include_optimizer=False)

In [0]:
from math import log
from numpy import array
from numpy import argmax
 
# beam search
def beam_search_decoder(data, k):
	sequences = [[list(), 1.0]] #[[[], 1.0]]
	# walk over each step in sequence
	for row in data:
		all_candidates = list()
		# expand each current candidate
		for i in range(len(sequences)):
			seq, score = sequences[i] #[], 1.0
			for j in range(len(row)):
                # row = data[0,:]
				candidate = [seq + [j], score * -log(row[j])] # candidate=[[0], 1.0*-log(0.1) #ありえないものほどスコアがでかくなる
                #print('candidate', candidate)
				all_candidates.append(candidate) # all_candidate = [[[0], 1.0*-log(0.1)], [[1], 1.0-log(0.2)], .. ]
                #print('all_cand', all_candidates)
		# order all candidates by score
		ordered = sorted(all_candidates, key=lambda tup:tup[1])
		# select k best
		sequences = ordered[:k] #[ row_index(word_index), score]
	return sequences

 # define a sequence of 10 words over a vocab of 5 words
data = [[0.1, 0.2, 0.3, 0.4, 0.5],]
		# [0.5, 0.4, 0.3, 0.2, 0.1],
		# [0.1, 0.2, 0.3, 0.4, 0.5],
		# [0.5, 0.4, 0.3, 0.2, 0.1],
		# [0.1, 0.2, 0.3, 0.4, 0.5],
		# [0.5, 0.4, 0.3, 0.2, 0.1],
		# [0.1, 0.2, 0.3, 0.4, 0.5],
		# [0.5, 0.4, 0.3, 0.2, 0.1],
		#[0.1, 0.2, 0.3, 0.4, 0.5],
		#[0.5, 0.4, 0.3, 0.2, 0.1]]
data2 = []
data = array(data)
# decode sequence
result = beam_search_decoder(data, 3)
# print result
for seq in result:
	print(seq)

[[4], 0.6931471805599453]
[[3], 0.916290731874155]
[[2], 1.2039728043259361]
