In [1]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
data = pd.read_csv('induce-data-2019-08-08.csv').iloc[:, :]
vocab = ['C_E_F_T',
         'C_E_F_C',
         'C_E_F_O',
         'A_E_F_T',
         'A_E_F_O',
         'A_E_F_C',
         'G_E_F_C',
         'G_E_F_T',
         'G_E_F_O',
         'A_E_M_T',
         'A_E_M_O',
         'A_E_M_C',
         'G_E_M_O',
         'G_E_M_C',
         'G_E_M_T',
         'C_E_M_O',
         'C_E_M_C',
         'C_E_M_T',
         'C_H_F_CO',
         'C_H_F_CT',
         'C_H_F_OT',
         'G_H_F_OT',
         'G_H_F_CO',
         'G_H_F_CT',
         'A_H_F_CT',
         'A_H_F_OT',
         'A_H_F_CO',
         'C_H_M_CO',
         'C_H_M_CT',
         'C_H_M_OT',
         'A_H_M_CT',
         'A_H_M_OT',
         'A_H_M_CO',
         'G_H_M_OT',
         'G_H_M_CO',
         'G_H_M_CT', ]

labels = ['correct',
          'wrong',
          'type',
          'orientation',
          'color']

types = ['INTRO',
         'CORE',
         'FLEX',
         'TRIK',
         'DELY'
]

topics = ['cards',
          'animals',
          'geometry'
    
]

feat = ['type',
        'color',
        'orientation',
        'dual'
]

age = ['8-10','11-13']

In [3]:
def seq_to_int(qts,vocab, labels, types, feat, topics, n_steps, age):
    integ = list()
    for i,x in enumerate(qts):
        if i != n_steps:
            features = list()
            features.append(vocab.index(qts[i, 4]))
            features.append(qts[i, 2])
            features.append(types.index(qts[i, 9]))
            features.append(feat.index(qts[i, 10]))
            features.append(topics.index(qts[i, 6]))
            features.append(age.index(qts[i, 7]))
            features.append(labels.index(qts[i, 5]))
        else:
            features = list()
            features.append(vocab.index(qts[i, 4]))
            features.append(qts[i, 2])
            features.append(types.index(qts[i, 9]))
            features.append(feat.index(qts[i, 10]))
            features.append(topics.index(qts[i, 6]))
            features.append(age.index(qts[i, 7]))
            features.append(-1)
        integ.append(features)
    return integ

def split_sequence(data, n_steps, vocab, labels, types, feat, topics, age):
    X, Y = list(), list()
    users = list(dict.fromkeys(data.loc[:, "user"]))
    for u in users:
        sequence = data[data.user == u]
        for i in range(len(sequence)):
            end_idx = i + n_steps
            if end_idx > len(sequence)-1:
                break
            x = seq_to_int(sequence.iloc[i:end_idx+1, :].values, vocab, labels, types, feat, topics, n_steps, age)
            y = labels.index(str(sequence.iloc[end_idx, 5]))
            X.append(x)
            Y.append(y)
    return np.array(X), np.array(Y)

In [50]:
data_train = data.iloc[:]
y_train = list()
X_train = list()


for i in tqdm(range(5, 10)):
    X_seq, y_seq = split_sequence(data_train, i, vocab, labels, types, feat, topics, age)
    for x in X_seq:
        X_train.append(x)
    for _y in y_seq:
        y_train.append(_y)
y_train = np_utils.to_categorical(y_train)
X_train = np.asarray(X_train)

100%|██████████| 5/5 [00:27<00:00,  5.43s/it]


In [51]:
print(X_train)
X_train = pad_sequences(X_train, value=99)
print(X_train.shape)

[array([[ 0,  0,  0,  0,  0,  1,  0],
       [ 1,  1,  0,  1,  0,  1,  0],
       [ 2,  2,  0,  2,  0,  1,  0],
       [ 3,  3,  0,  0,  1,  1,  0],
       [ 4,  4,  0,  2,  1,  1,  0],
       [ 5,  5,  0,  1,  1,  1, -1]])
 array([[ 1,  1,  0,  1,  0,  1,  0],
       [ 2,  2,  0,  2,  0,  1,  0],
       [ 3,  3,  0,  0,  1,  1,  0],
       [ 4,  4,  0,  2,  1,  1,  0],
       [ 5,  5,  0,  1,  1,  1,  1],
       [ 6,  6,  0,  1,  2,  1, -1]])
 array([[ 2,  2,  0,  2,  0,  1,  0],
       [ 3,  3,  0,  0,  1,  1,  0],
       [ 4,  4,  0,  2,  1,  1,  0],
       [ 5,  5,  0,  1,  1,  1,  1],
       [ 6,  6,  0,  1,  2,  1,  0],
       [ 7,  7,  0,  0,  2,  1, -1]])
 ...
 array([[21, 36,  3,  3,  2,  1,  3],
       [33, 37,  2,  3,  2,  1,  3],
       [18, 38,  3,  3,  0,  1,  3],
       [27, 39,  2,  3,  0,  1,  3],
       [24, 40,  3,  3,  1,  1,  2],
       [30, 41,  2,  3,  1,  1,  4],
       [21, 42,  4,  3,  2,  1,  3],
       [18, 43,  4,  3,  0,  1,  3],
       [24, 44,  4,  3,  1

In [42]:
n_hidden = int(2/3 * (X_train.shape[1]+X_train.shape[2]))

11


In [54]:
model = Sequential()
model.add(LSTM(units=256, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Dropout(0.2))
#model.add(LSTM(units=n_hidden, return_sequences=False))
#model.add(Dropout(0.2))
model.add(Dense(units=5))
amsgrad = Adam(amsgrad=False)
model.add(Activation('softmax'))
model.compile(optimizer=amsgrad, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_11 (LSTM)               (None, 256)               270336    
_________________________________________________________________
dense_6 (Dense)              (None, 5)                 1285      
_________________________________________________________________
activation_6 (Activation)    (None, 5)                 0         
Total params: 271,621
Trainable params: 271,621
Non-trainable params: 0
_________________________________________________________________


In [55]:
history = model.fit(X_train, np.asarray(y_train), epochs=200, batch_size=3000, shuffle=True, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

KeyboardInterrupt: 

In [None]:
plt.plot(history.history['loss'])

In [None]:
plt.plot(history.history['acc'])

In [None]:
#model.save("model.h5")
#from keras.models import load_model
#model = load_model("model.h5")