In [1]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import LSTM, Dense
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Data preparation

In [51]:

data = pd.read_csv('induce-data-2019-08-08.csv').iloc[:, :]
vocab = ['C_E_F_T',
         'C_E_F_C',
         'C_E_F_O',
         'A_E_F_T',
         'A_E_F_O',
         'A_E_F_C',
         'G_E_F_C',
         'G_E_F_T',
         'G_E_F_O',
         'A_E_M_T',
         'A_E_M_O',
         'A_E_M_C',
         'G_E_M_O',
         'G_E_M_C',
         'G_E_M_T',
         'C_E_M_O',
         'C_E_M_C',
         'C_E_M_T',
         'C_H_F_CO',
         'C_H_F_CT',
         'C_H_F_OT',
         'G_H_F_OT',
         'G_H_F_CO',
         'G_H_F_CT',
         'A_H_F_CT',
         'A_H_F_OT',
         'A_H_F_CO',
         'C_H_M_CO',
         'C_H_M_CT',
         'C_H_M_OT',
         'A_H_M_CT',
         'A_H_M_OT',
         'A_H_M_CO',
         'G_H_M_OT',
         'G_H_M_CO',
         'G_H_M_CT', ]
labels = ['correct',
          'wrong',
          'type',
          'orientation',
          'color']

types = ['INTRO',
         'CORE',
         'FLEX',
         'TRIK',
         'DELY'
]

feat = ['type',
        'color',
        'orientation',
        'dual'
]

def seq_to_int(qts,vocab, labels, types, feat):
    integ = list()
    for i,x in enumerate(qts):
        features = list()
        features.append(vocab.index(qts[i, 4]))
        features.append(labels.index(qts[i, 5]))
        features.append(qts[i, 2])
        features.append(types.index(qts[i, 9]))
        features.append(feat.index(qts[i, 10]))
        integ.append(features)
    return integ
def split_sequence(data, n_steps, vocab, labels, types, feat):
    X, Y = list(), list()
    users = list(dict.fromkeys(data.loc[:, "user"]))
    for u in users:
        sequence = data[data.user == u]
        for i in range(len(sequence)):
            end_idx = i + n_steps
            if end_idx > len(sequence)-1:
                break
            x = seq_to_int(sequence.iloc[i:end_idx, :].values, vocab, labels, types, feat)
            y = labels.index(str(sequence.iloc[end_idx, 5]))
            X.append(x)
            Y.append(y)
    return np.array(X), np.array(Y)



### Test split_sequence


In [52]:
print(split_sequence(data.iloc[:10, :], 5, vocab, labels, types, feat))


(array([[[0, 0, 0, 0, 0],
        [1, 0, 1, 0, 1],
        [2, 0, 2, 0, 2],
        [3, 0, 3, 0, 0],
        [4, 0, 4, 0, 2]],

       [[1, 0, 1, 0, 1],
        [2, 0, 2, 0, 2],
        [3, 0, 3, 0, 0],
        [4, 0, 4, 0, 2],
        [5, 1, 5, 0, 1]],

       [[2, 0, 2, 0, 2],
        [3, 0, 3, 0, 0],
        [4, 0, 4, 0, 2],
        [5, 1, 5, 0, 1],
        [6, 0, 6, 0, 1]],

       [[3, 0, 3, 0, 0],
        [4, 0, 4, 0, 2],
        [5, 1, 5, 0, 1],
        [6, 0, 6, 0, 1],
        [7, 0, 7, 0, 0]],

       [[4, 0, 4, 0, 2],
        [5, 1, 5, 0, 1],
        [6, 0, 6, 0, 1],
        [7, 0, 7, 0, 0],
        [8, 0, 8, 0, 2]]]), array([1, 0, 0, 0, 0]))


## The model


In [56]:
n_features = 5
n_steps = 10


X, y = split_sequence(data, n_steps, vocab, labels, types, feat)
y = np_utils.to_categorical(y)
X = X.reshape((X.shape[0], X.shape[1], n_features))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print('train set size: '+str(len(y_train)))
print('test set size: '+str(len(y_test)))


model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(5))
model.compile(optimizer='adam', loss='mse', metrics=['categorical_accuracy'])

train set size :7870
test set size :3877


### Training

In [None]:
# fit model
model.fit(X_train, y_train, epochs=200, verbose=2)

Epoch 1/200
 - 11s - loss: 7.6883 - categorical_accuracy: 0.3738
Epoch 2/200
 - 6s - loss: 0.1621 - categorical_accuracy: 0.4615
Epoch 3/200
 - 6s - loss: 0.1414 - categorical_accuracy: 0.4821
Epoch 4/200
 - 6s - loss: 0.1346 - categorical_accuracy: 0.4936
Epoch 5/200
 - 6s - loss: 0.1288 - categorical_accuracy: 0.5022
Epoch 6/200
 - 6s - loss: 0.1249 - categorical_accuracy: 0.5055
Epoch 7/200
 - 7s - loss: 0.1257 - categorical_accuracy: 0.5113
Epoch 8/200
 - 7s - loss: 0.1231 - categorical_accuracy: 0.5158
Epoch 9/200
 - 7s - loss: 0.1213 - categorical_accuracy: 0.5280
Epoch 10/200
 - 7s - loss: 0.1200 - categorical_accuracy: 0.5286
Epoch 11/200
 - 7s - loss: 0.1184 - categorical_accuracy: 0.5370
Epoch 12/200
 - 6s - loss: 0.1177 - categorical_accuracy: 0.5384
Epoch 13/200
 - 6s - loss: 0.1177 - categorical_accuracy: 0.5346
Epoch 14/200
 - 7s - loss: 0.1159 - categorical_accuracy: 0.5515
Epoch 15/200
 - 6s - loss: 0.1162 - categorical_accuracy: 0.5478
Epoch 16/200
 - 7s - loss: 0.1158

### Validation

In [55]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))



Accuracy: 64.53%
