In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation, RepeatVector
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

Using TensorFlow backend.


In [6]:
#ランダムに1桁から3桁の数字(0123456789)を返す
def n(digits=3):
    number = ''
    for i in range(np.random.randint(1,digits+1)):
        number += np.random.choice(list('0123456789'))
    return int(number)

def padding(chars,maxlen):
    return chars+' '*(maxlen - len(chars))

In [19]:
#データの生成
N = 20000
N_train = int(N*0.9)
N_validation = N - N_train

digits = 3 #最大ケタ数
input_digits = digits*2 + 1 #example: 123+456
output_digits = digits + 1 #500+500=1000 4桁以上

added = set()
questions = []
answers = []

while len(questions) < N:
    a , b = n() , n()
    
    #同じペアは含まないようにするため
    pair = tuple(sorted((a,b)))
    if pair in added:
        continue
    
    question = '{}+{}'.format(a,b)
    question = padding(question,input_digits)
    answer = str(a+b)
    answer = padding(answer,output_digits)
    
    added.add(pair)
    questions.append(question)
    answers.append(answer)

In [23]:
chars = '0123456789+ '
char_indices = dict((c,i) for i,c in enumerate(chars))
indices_char = dict((i,c) for i,c in enumerate(chars))

In [33]:
char_indices,indices_char

({' ': 11,
  '+': 10,
  '0': 0,
  '1': 1,
  '2': 2,
  '3': 3,
  '4': 4,
  '5': 5,
  '6': 6,
  '7': 7,
  '8': 8,
  '9': 9},
 {0: '0',
  1: '1',
  2: '2',
  3: '3',
  4: '4',
  5: '5',
  6: '6',
  7: '7',
  8: '8',
  9: '9',
  10: '+',
  11: ' '})

In [26]:
X = np.zeros((len(questions), input_digits,len(chars)),dtype=np.integer)
Y = np.zeros((len(questions), digits+1,len(chars)),dtype=np.integer)

In [28]:
X.shape #データ数,インプット数,記号数

(20000, 7, 12)

In [30]:
for i in range(N):
    for t,char in enumerate(questions[i]):
        X[i,t,char_indices[char]] = 1 
    for t,char in enumerate(answers[i]):
        Y[i,t,char_indices[char]] = 1

In [31]:
X[0]

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [36]:
char_indices['1']

1

In [37]:
X_train,X_validation,Y_train,Y_validation = train_test_split(X,Y,train_size=N_train)

In [39]:
# seq2seq
n_in = len(chars) 
n_hidden = 128 #適当
n_out = len(chars)

model = Sequential()
#encoder
model.add(LSTM(n_hidden,input_shape=(input_digits,n_in)))
#decoder
model.add(RepeatVector(output_digits))
model.add(LSTM(n_hidden,return_sequences=True))

model.add(TimeDistributed(Dense(n_out)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=1e-3,beta_1=0.9,beta_2=0.999),
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_2 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [42]:
#学習
epochs = 1
batch_size = 200

for epoch in range(epochs):
    model.fit(X_train,Y_train,batch_size=batch_size,epochs=1,
              validation_data=(X_validation,Y_validation))
    
    #検証データからランダムに問題を選んで答え合わせ
    for i in range(10):
        index = np.random.randint(0,N_validation)
        question = X_validation[np.array([index])]
        answer = Y_validation[np.array([index])]
        prediction = model.predict_classes(question,verbose=0)
        
        question = question.argmax(axis=-1)
        answer = answer.argmax(axis=-1)
        
        q = ''.join(indices_char[i] for i in question[0])
        a = ''.join(indices_char[i] for i in answer[0])
        p = ''.join(indices_char[i] for i in prediction[0])
    
        print('-' * 10)
        print('Q:  ', q)
        print('A:  ', p)
        print('T/F:', end=' ')
        if a == p:
            print('T')
        else:
            print('F')
    print('-' * 10)

Train on 18000 samples, validate on 2000 samples
Epoch 1/1
----------
Q:   18+30  
A:   11  
T/F: F
----------
Q:   2+706  
A:   11  
T/F: F
----------
Q:   16+381 
A:   113 
T/F: F
----------
Q:   263+62 
A:   113 
T/F: F
----------
Q:   445+3  
A:   14  
T/F: F
----------
Q:   616+9  
A:   11  
T/F: F
----------
Q:   33+22  
A:   13  
T/F: F
----------
Q:   6+992  
A:   113 
T/F: F
----------
Q:   47+964 
A:   113 
T/F: F
----------
Q:   2+944  
A:   11  
T/F: F
----------
