# 5 Seq2Sequence，机器自动翻译， Image Caption, Attention机制

## Sequence to sequence

In [2]:
from keras.layers import Input, LSTM, Dense
from keras.models import Model, load_model
from keras.utils import plot_model
import pandas as pd
import numpy as np

In [3]:
def create_model(n_input, n_output, n_units):
    # encoder
    encoder_input = Input(shape=(None, n_input))
    encoder = LSTM(n_units, return_state=True)
    _,encoder_h, encoder_c = encoder(encoder_input)
    encoder_state = [encoder_h, encoder_c]
    
    
    # Set up the decoder, using `encoder_states` as initial state.
    decoder_input = Input(shape=(None, n_output))
    decoder = LSTM(n_units, return_sequences=True, return_state=True)
    decoder_output, _, _ = decoder(decoder_input,
                                   initial_state=encoder_state)
    decoder_dense = Dense(n_output, activation='softmax')
    decoder_output = decoder_dense(decoder_output)
    
    # Define the model 
    model = Model([encoder_input, decoder_input], decoder_output)
    
    # inference setup
    # encoder
    encoder_infer = Model(encoder_input, encoder_state)
    
    # decoder
    decoder_state_input_h = Input(shape=(n_units,))
    decoder_state_input_c = Input(shape=(n_units,))    
    decoder_state_input = [decoder_state_input_h, decoder_state_input_c] 
    
    decoder_infer_output, decoder_infer_state_h, decoder_infer_state_c = decoder(decoder_input,
                                                                                 initial_state=decoder_state_input)
    decoder_infer_state = [decoder_infer_state_h, decoder_infer_state_c]
    decoder_infer_output = decoder_dense(decoder_infer_output)
    
    decoder_infer = Model([decoder_input] + decoder_state_input,
                          [decoder_infer_output] + decoder_infer_state)
    
    return model, encoder_infer, decoder_infer

In [4]:
N_UNITS = 256
BATCH_SIZE = 64
EPOCH = 50
NUM_SAMPLES = 10000

# 数据下载
[下载地址](http://www.manythings.org/anki/)

# 数据读取

In [5]:
data_path = '../resource/cmn.txt'
df = pd.read_table(data_path,header=None).iloc[:NUM_SAMPLES,:,]
df.columns=['inputs', 'targets', 'others']

df['targets'] = df['targets'].apply(lambda x: '\t'+x+'\n')

input_texts = df.inputs.values.tolist()
target_texts = df.targets.values.tolist()

input_characters = sorted(list(set(df.inputs.unique().sum())))
target_characters = sorted(list(set(df.targets.unique().sum())))

In [6]:
INUPT_LENGTH = max([len(i) for i in input_texts])
OUTPUT_LENGTH = max([len(i) for i in target_texts])
INPUT_FEATURE_LENGTH = len(input_characters)
OUTPUT_FEATURE_LENGTH = len(target_characters)

# 向量化

In [8]:
encoder_input = np.zeros((NUM_SAMPLES, INUPT_LENGTH, INPUT_FEATURE_LENGTH))
decoder_input = np.zeros((NUM_SAMPLES, OUTPUT_LENGTH, OUTPUT_FEATURE_LENGTH))
decoder_output = np.zeros((NUM_SAMPLES, OUTPUT_LENGTH, OUTPUT_FEATURE_LENGTH))

In [9]:
input_dict = {char:index for index,char in enumerate(input_characters)}
input_dict_reverse = {index:char for index,char in enumerate(input_characters)}
target_dict = {char:index for index,char in enumerate(target_characters)}
target_dict_reverse = {index:char for index,char in enumerate(target_characters)}

In [10]:
for seq_index,seq in enumerate(input_texts):
    for char_index, char in enumerate(seq):
        encoder_input[seq_index, char_index, input_dict[char]] = 1

In [11]:
for seq_index,seq in enumerate(target_texts):
    for char_index,char in enumerate(seq):
        decoder_input[seq_index,char_index, target_dict[char]] = 1.0
        if char_index > 0:
            decoder_output[seq_index,char_index-1, target_dict[char]] = 1.0

# 观察向量化的数据

In [12]:
''.join([input_dict_reverse[np.argmax(i)] for i in encoder_input[0] if max(i) !=0])

'Hi.'

In [13]:
''.join([target_dict_reverse[np.argmax(i)] for i in decoder_output[0] if max(i) !=0])

'嗨。\n'

# 创建模型

In [14]:
model_train, encoder_infer, decoder_infer = create_model(INPUT_FEATURE_LENGTH,
                                                         OUTPUT_FEATURE_LENGTH,
                                                         N_UNITS)

In [15]:
# Compile & run training
model_train.compile(optimizer='rmsprop',
                    loss='categorical_crossentropy')

In [16]:
model_train.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 72)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 2561)   0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 256), (None, 336896      input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 256),  2885632     input_2[0][0]                    
                                                                 lstm_1[0][1]               

In [17]:
encoder_infer.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 72)          0         
_________________________________________________________________
lstm_1 (LSTM)                [(None, 256), (None, 256) 336896    
Total params: 336,896
Trainable params: 336,896
Non-trainable params: 0
_________________________________________________________________


In [18]:
decoder_infer.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, None, 2561)   0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 256)          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 256)          0                                            
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 256),  2885632     input_2[0][0]                    
                                                                 input_3[0][0]              

In [19]:
validation_split = 0.2
model_train.fit([encoder_input,decoder_input],
                decoder_output,
                batch_size=BATCH_SIZE,
                epochs=EPOCH,
                validation_split=validation_split)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 8000 samples, validate on 2000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

KeyboardInterrupt: 

# 预测序列

In [20]:
def predict_chinese(source,encoder_inference, decoder_inference, n_steps, features):
    state = encoder_inference.predict(source)
    predict_seq = np.zeros((1,1,features))
    predict_seq[0,0,target_dict['\t']] = 1

    output = ''

    for i in range(n_steps): # n_steps为句子最大长度
        yhat,h,c = decoder_inference.predict([predict_seq]+state)
        char_index = np.argmax(yhat[0,-1,:])
        char = target_dict_reverse[char_index]
        output += char
        state = [h,c]
        predict_seq = np.zeros((1,1,features))
        predict_seq[0,0,char_index] = 1
        if char == '\n':
            break
    return output

In [21]:
for i in range(1000,1100):
    test = encoder_input[i:i+1,:,:] 
    out = predict_chinese(test,encoder_infer,decoder_infer,OUTPUT_LENGTH,OUTPUT_FEATURE_LENGTH)
    print(input_texts[i])
    print(out)

Stop grumbling.
你的是我的。

Stop resisting!
你的在我的。

Summer is over.
我不是我的。

Take your time.
你是什麼？

Take your time.
你是什麼？

That was wrong.
你是我的。

That's a shame.
你是我的。

That's logical.
你是我的。

That's my coat.
你是我的。

That's perfect.
你是我的。

That's too bad.
你是我的。

That's too bad.
你是我的。

That's too bad.
你是我的。

The birds sang.
你是我的。

The flag is up.
你是我的。

The phone rang.
你是我的。

Their eyes met.
那是我的。

These are pens.
你是我的。

They hated Tom.
你是我的。

They have jobs.
你是我的。

They let me go.
你是我的。

They love that.
你是我的。

They trust Tom.
你是我的。

They want more.
那是我的。

They want this.
那是我的。

They were good.
你是我的。

This is a book.
那是我的。

This is my bag.
那是我的。

Tom can change.
那是我的。

Tom can't swim.
那是我的。

Tom has a plan.
那是我的。

Tom is a rabbi.
那是我的。

Tom is no fool.
那是我的。

Tom isn't dumb.
那是我的。

Tom looks pale.
那是我的。

Tom loves dogs.
那是我的。

Tom turned red.
那是我的。

Tom walked out.
他是我的。

Tom was crying.
他是我的。

Tom won't stop.
那是我的。

Tom's fearless.
那是我的。

Tom's laughing.
那是我的。

Tom's thrilled.
那是我的。

Turn on 