## Imports

In [1]:
import sys, pdb
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.layers.recurrent import LSTM

# Add the parent directory to PYTHONPATH so that we can use utils.py
sys.path.append('..')
import utils

%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


## Load Vector Embeddings

In [2]:
d = 50 #dimensionality of word vectors
data = utils.build_word_vector_matrix('../data/embeddings/5/vectors_d{}.txt'.format(d))
embeddings, labels, id_to_move, move_to_id = data

## Load Move Dataset

In [3]:
with open('../data/test_moves.txt', 'r') as f:
    moves = f.read().split()
    
# reduce number of moves for now
moves = moves[:100000]
uniq_moves = list(set(moves))
print('{} unique moves in vector encoding'.format(len(labels)))
print('{} unique moves in training set'.format(len(uniq_moves)))

3469 unique moves in vector encoding
1951 unique moves in training set


In [4]:
window_size = 20
step = 1
input_moves = []
output_moves = []

for i in range(0, len(moves) - window_size, step):
    input_moves.append(moves[i:i + window_size])
    output_moves.append(moves[i + window_size])

output_move_ids = []
for m in output_moves:
    if m in move_to_id:
        output_move_ids.append(move_to_id[m])
    else:
        # unknown
        output_move_ids.append(-1)

y = to_categorical(output_move_ids, len(labels))
X = []
print('{} input sequences'.format(len(input_moves)))
unknown_moves = set()
for sequence in input_moves:
    seq = []
    for move in sequence:
        if move in move_to_id:
            #vec is of length 50
            vec = embeddings[move_to_id[move]]
            seq.append(vec.tolist())
        else:
            unknown_moves.add(move)
            #this zero array is of length 50
            seq.append(np.zeros((d,)).tolist())
    X.append(seq)

# really wierd fucking bug right here
w = np.array(X)
X = w

print('Moves not found in vector embedding dictionary:')
print(*unknown_moves)

99980 input sequences
Moves not found in vector embedding dictionary:
N5xh4 Nbxd3+ R4d2+ R2h6+ Rexb6 Raxf2+ Rdxd5+ Nce8+ N3e2+ R4xe6 Nba8 Naxc8 N6xa5 Rhf8+ axb1=Q Ne7# Rcg6+ g5# Qxh3# Ned2+ Raxd5+ Kxe6+ Rah6 Ndxf3+ N4xf3 Ngxf4+ Rhg6+ Qb6# Rdxf6+ Qe2# N8xh7 Rcxg7+ N4d3


In [None]:
model = Sequential()
model.add(LSTM(256, return_sequences=True, batch_input_shape=(None, window_size, d)))
model.add(Dropout(0.5))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(len(labels)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001, decay=0.0001), metrics=['accuracy'])
print(model.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lstm_1 (LSTM)                    (None, 20, 256)       314368      lstm_input_1[0][0]               
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 20, 256)       0           lstm_1[0][0]                     
____________________________________________________________________________________________________
lstm_2 (LSTM)                    (None, 256)           525312      dropout_1[0][0]                  
____________________________________________________________________________________________________
dropout_2 (Dropout)              (None, 256)           0           lstm_2[0][0]                     
___________________________________________________________________________________________

In [None]:
# more epochs is usually better, but training can be very slow if not on a GPU
epochs = 30
history = model.fit(X, y, batch_size=32, nb_epoch=epochs, validation_split=0.2, verbose=1)

Train on 79984 samples, validate on 19996 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
18080/79984 [=====>........................] - ETA: 62s - loss: 5.3968 - acc: 0.0950

In [None]:
utils.plot_model_results(history)   

In [None]:
predicted = model.predict(X)
ids = [np.argmax(p) for p in predicted]
moves = [id_to_move[i] for i in ids]

In [None]:
moves