In [27]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import LSTM, Dense
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

## Data preparation

In [28]:

data = pd.read_csv('induce-data-2019-08-08.csv').iloc[:200, :]
vocab = ['C_E_F_T',
         'C_E_F_C',
         'C_E_F_O',
         'A_E_F_T',
         'A_E_F_O',
         'A_E_F_C',
         'G_E_F_C',
         'G_E_F_T',
         'G_E_F_O',
         'A_E_M_T',
         'A_E_M_O',
         'A_E_M_C',
         'G_E_M_O',
         'G_E_M_C',
         'G_E_M_T',
         'C_E_M_O',
         'C_E_M_C',
         'C_E_M_T',
         'C_H_F_CO',
         'C_H_F_CT',
         'C_H_F_OT',
         'G_H_F_OT',
         'G_H_F_CO',
         'G_H_F_CT',
         'A_H_F_CT',
         'A_H_F_OT',
         'A_H_F_CO',
         'C_H_M_CO',
         'C_H_M_CT',
         'C_H_M_OT',
         'A_H_M_CT',
         'A_H_M_OT',
         'A_H_M_CO',
         'G_H_M_OT',
         'G_H_M_CO',
         'G_H_M_CT', ]
labels = ['correct',
          'wrong',
          'type',
          'orientation',
          'color']
def seq_to_int(qts, asw, vocab, labels):
    integ = list()
    for i,x in enumerate(qts):
        features = list()
        features.append(vocab.index(x))
        features.append(labels.index(asw.iloc[i]))
        integ.append(features)
    return integ
def split_sequence(sequence, n_steps, vocab, labels):
    X, Y = list(), list()
    for i in range(len(sequence)):
        end_idx = i + n_steps
        if end_idx > len(sequence)-1:
            break
        x = seq_to_int(sequence.iloc[i:end_idx, 4], sequence.iloc[i:end_idx, 5], vocab, labels)
        y = labels.index(str(sequence.iloc[end_idx, 5]))
        X.append(x)
        Y.append(y)
    return np.array(X), np.array(Y)



### Test split_sequence


In [3]:
print(split_sequence(data.iloc[:10, :], 5, vocab, labels))


(array([[[0, 0],
        [1, 0],
        [2, 0],
        [3, 0],
        [4, 0]],

       [[1, 0],
        [2, 0],
        [3, 0],
        [4, 0],
        [5, 1]],

       [[2, 0],
        [3, 0],
        [4, 0],
        [5, 1],
        [6, 0]],

       [[3, 0],
        [4, 0],
        [5, 1],
        [6, 0],
        [7, 0]],

       [[4, 0],
        [5, 1],
        [6, 0],
        [7, 0],
        [8, 0]]]), array([1, 0, 0, 0, 0]))


## The model


In [36]:
n_features = 2
n_steps = 5
X, y = split_sequence(data.iloc[:, :], n_steps, vocab, labels)
y = np_utils.to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X = X.reshape((X.shape[0], X.shape[1], n_features))
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(5))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [37]:
# fit model
model.fit(X_train, y_train, epochs=200, verbose=0)

<keras.callbacks.callbacks.History at 0x1a35ee2b90>

In [38]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


Accuracy: 47.69%
