In [17]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import LSTM, Dense
from keras.utils import np_utils

## Data preparation

In [2]:

data = pd.read_csv('induce-data-2019-08-08.csv').iloc[:200, :]
vocab = ['C_E_F_T',
         'C_E_F_C',
         'C_E_F_O',
         'A_E_F_T',
         'A_E_F_O',
         'A_E_F_C',
         'G_E_F_C',
         'G_E_F_T',
         'G_E_F_O',
         'A_E_M_T',
         'A_E_M_O',
         'A_E_M_C',
         'G_E_M_O',
         'G_E_M_C',
         'G_E_M_T',
         'C_E_M_O',
         'C_E_M_C',
         'C_E_M_T',
         'C_H_F_CO',
         'C_H_F_CT',
         'C_H_F_OT',
         'G_H_F_OT',
         'G_H_F_CO',
         'G_H_F_CT',
         'A_H_F_CT',
         'A_H_F_OT',
         'A_H_F_CO',
         'C_H_M_CO',
         'C_H_M_CT',
         'C_H_M_OT',
         'A_H_M_CT',
         'A_H_M_OT',
         'A_H_M_CO',
         'G_H_M_OT',
         'G_H_M_CO',
         'G_H_M_CT', ]
labels = ['correct',
          'wrong',
          'type',
          'orientation',
          'color']
def seq_to_int(qts, asw, vocab, labels):
    integ = list()
    for i,x in enumerate(qts):
        features = list()
        features.append(vocab.index(x))
        features.append(labels.index(asw.iloc[i]))
        integ.append(features)
    return integ
def split_sequence(sequence, n_steps, vocab, labels):
    X, Y = list(), list()
    for i in range(len(sequence)):
        end_idx = i + n_steps
        if end_idx > len(sequence)-1:
            break
        x = seq_to_int(sequence.iloc[i:end_idx, 4], sequence.iloc[i:end_idx, 5], vocab, labels)
        y = labels.index(str(sequence.iloc[end_idx, 5]))
        X.append(x)
        Y.append(y)
    return np.array(X), np.array(Y)



### Test split_sequence


In [3]:
print(split_sequence(data.iloc[:10, :], 5, vocab, labels))


(array([[[0, 0],
        [1, 0],
        [2, 0],
        [3, 0],
        [4, 0]],

       [[1, 0],
        [2, 0],
        [3, 0],
        [4, 0],
        [5, 1]],

       [[2, 0],
        [3, 0],
        [4, 0],
        [5, 1],
        [6, 0]],

       [[3, 0],
        [4, 0],
        [5, 1],
        [6, 0],
        [7, 0]],

       [[4, 0],
        [5, 1],
        [6, 0],
        [7, 0],
        [8, 0]]]), array([1, 0, 0, 0, 0]))


## The model


In [24]:
n_features = 2
n_steps = 5
X, Y = split_sequence(data.iloc[:100, :], n_steps, vocab, labels)
Y = np_utils.to_categorical(Y)
X = X.reshape((X.shape[0], X.shape[1], n_features))

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(5))
model.compile(optimizer='adam', loss='mse')

In [25]:
# fit model
model.fit(X, Y, epochs=200, verbose=2)

Epoch 1/200
 - 0s - loss: 5.6416
Epoch 2/200
 - 0s - loss: 4.0741
Epoch 3/200
 - 0s - loss: 2.6709
Epoch 4/200
 - 0s - loss: 1.6104
Epoch 5/200
 - 0s - loss: 0.7804
Epoch 6/200
 - 0s - loss: 0.3139
Epoch 7/200
 - 0s - loss: 0.3063
Epoch 8/200
 - 0s - loss: 0.4528
Epoch 9/200
 - 0s - loss: 0.4342
Epoch 10/200
 - 0s - loss: 0.3171
Epoch 11/200
 - 0s - loss: 0.2352
Epoch 12/200
 - 0s - loss: 0.2135
Epoch 13/200
 - 0s - loss: 0.2197
Epoch 14/200
 - 0s - loss: 0.2229
Epoch 15/200
 - 0s - loss: 0.2163
Epoch 16/200
 - 0s - loss: 0.2014
Epoch 17/200
 - 0s - loss: 0.1866
Epoch 18/200
 - 0s - loss: 0.1755
Epoch 19/200
 - 0s - loss: 0.1672
Epoch 20/200
 - 0s - loss: 0.1606
Epoch 21/200
 - 0s - loss: 0.1533
Epoch 22/200
 - 0s - loss: 0.1477
Epoch 23/200
 - 0s - loss: 0.1409
Epoch 24/200
 - 0s - loss: 0.1357
Epoch 25/200
 - 0s - loss: 0.1312
Epoch 26/200
 - 0s - loss: 0.1275
Epoch 27/200
 - 0s - loss: 0.1240
Epoch 28/200
 - 0s - loss: 0.1216
Epoch 29/200
 - 0s - loss: 0.1202
Epoch 30/200
 - 0s - lo

<keras.callbacks.callbacks.History at 0x63350b950>

In [26]:
# demonstrate prediction
x_input, y_star = split_sequence(data.iloc[10:20, :], 5, vocab, labels)
x_input = x_input[0]
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
print(y_star[0])


[[ 0.69547296  0.18415695  0.07763203  0.07747096 -0.00537475]]
0
