# Apply LSTM model to classify Spindles Datasets

## Packages need to be installed

```shell
> conda install numpy pandas tensorflow-gpu scikit-learn
```

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load the CSV data

In [2]:
# path to the dataset
DATA_PATH = "data_excerpt1.csv"

In [3]:
# load the data
data = pd.read_csv(DATA_PATH, header=0, index_col=0)

## Check if the data is correctly loaded
First 10 rows of the data

In [4]:
data.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,Label
0,-0.9562,-0.8962,-3.1877,-4.1783,-5.319,-6.1896,-7.8006,-6.9701,-0.7061,2.3058,...,6.1483,6.0882,1.1451,-2.5673,0.7148,6.9588,4.8775,-2.9575,-7.4704,5
1,-8.8313,-8.5511,-6.5198,-4.1983,2.2958,6.1183,5.2477,3.8268,2.6561,2.7161,...,3.6667,-1.2364,-6.6799,-4.0282,-1.8968,-4.3584,-3.758,-0.8362,-6.4598,5
2,-12.7638,-7.7906,-5.7993,-9.7518,-5.4891,-3.3878,-5.4791,-3.9381,-4.6186,-5.0589,...,-6.8,-6.3097,-4.8687,-2.7074,-1.0463,-2.3972,0.6148,5.4378,4.3371,5
3,0.5647,1.2752,2.1357,0.8049,3.6467,2.2358,-2.217,1.7355,4.7074,3.8268,...,2.9562,7.6292,1.3252,-3.0876,3.0463,-1.8468,-1.8168,-2.177,-6.6098,5
4,-3.9882,-5.1989,-10.8325,-11.8732,-8.7412,-7.9107,-4.7687,-4.1183,-7.0001,-5.249,...,2.3258,2.3459,-1.1364,-4.6386,-4.6486,-0.556,-0.7661,-3.8881,-4.3184,5
5,-6.4698,-8.9013,-8.0708,-3.5679,-4.1683,-7.6505,-5.6793,-2.2471,1.2051,-6.81,...,0.7749,2.3058,7.8594,3.7568,-4.2383,-2.6573,4.6873,3.2764,-0.7961,5
6,-2.3271,-6.3097,-6.7699,-4.6586,-2.6573,1.005,3.3165,4.7174,5.8581,5.5579,...,-2.0169,1.3152,0.5847,-2.6373,-5.8994,-4.0982,-4.9288,-10.1921,-11.4629,5
7,-9.7318,-7.2903,-3.3878,1.8656,2.9562,4.3872,12.3822,15.8545,13.8732,14.3035,...,10.5511,11.992,10.8112,8.6999,6.8787,2.4059,1.4653,2.7261,0.7949,5
8,2.2758,7.7793,12.022,9.9707,4.5072,0.4747,-3.3378,-6.5798,-8.481,-7.9307,...,-0.3359,4.0569,2.9562,-3.3578,-1.9369,-2.9275,-4.6486,-2.7674,-1.1764,5
9,-7.0801,-4.2884,1.2852,-2.187,-0.0257,2.596,2.9462,-1.2064,-3.0176,-0.0457,...,11.922,9.6505,6.9088,13.2828,13.0827,12.1021,11.942,9.2402,10.7712,5


## Seperate features and labels

In [5]:
x = np.asarray(data.iloc[:,:-1])
y = np.asarray(data.iloc[:,-1])

## Seperate training data and test data

Using cross_validation functions from Scikit-learn package

For simple, I used train_test_split to split the data (not use k-fold cross-vaildation yet).
I splitted 1/4 of the data as test data (because there are only 4 samples of class 0)

For other cross-validation methods, please check this link: https://scikit-learn.org/stable/modules/cross_validation.html

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, 
                                                    y, 
                                                    test_size=0.25, 
                                                    random_state=0,
                                                    stratify=y)

In [7]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((270, 500), (90, 500), (270,), (90,))

# Model

## Constant for create model
Do not change

In [8]:
NUM_FEATURES = x.shape[1]
NUM_CLASSES = len(np.unique(y))
NUM_TRAIN_SAMPLE = y_train.shape[0]
NUM_TEST_SAMPLE = y_test.shape[0]

## Define the model
A simple model with only 1 LSTM layer

In [9]:
# Number of nodes in the LSTM layer
# You can change this
LSTM_SIZE = 10

# Dropout probability
# You can change it in range [0,1]
DROPOUT = 0.0

In [10]:
# Define the model
model = tf.keras.Sequential()

model.add(tf.keras.layers.Reshape((NUM_FEATURES, 1), input_shape=(NUM_FEATURES,)))
model.add(tf.keras.layers.LSTM(LSTM_SIZE, return_sequences=False, input_shape=(NUM_FEATURES, 1)))
model.add(tf.keras.layers.Dropout(DROPOUT))
model.add(tf.keras.layers.Dense(NUM_CLASSES, activation='softmax'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 500, 1)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 10)                480       
_________________________________________________________________
dropout (Dropout)            (None, 10)                0         
_________________________________________________________________
dense (Dense)                (None, 5)                 55        
Total params: 535
Trainable params: 535
Non-trainable params: 0
_________________________________________________________________
None


## Prepare data to train and test the model

In [11]:
# batch size
# you can change this
BATCH_SIZE = 50

# number of epochs to train the model
# you can change this
NUM_EPOCHS = 100

# do not change this
PER_EPOCH_STEPS = NUM_TRAIN_SAMPLE//BATCH_SIZE
TEST_PER_EPOCH_STEPS = NUM_TEST_SAMPLE//BATCH_SIZE

### Create tensorflow data to train and test the model

In [12]:
print(y_train[:10])
encoder = OneHotEncoder(handle_unknown='ignore')
y_new = encoder.fit_transform(y_train.reshape(-1,1)).toarray()
y_new[:10]

[5 1 2 1 5 2 5 1 2 1]


array([[0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.]])

In [13]:
# encode class labels as one-hot vectors
encoder = OneHotEncoder(handle_unknown='ignore')
y_train = encoder.fit_transform(y_train.reshape(-1,1)).toarray()
y_test = encoder.fit_transform(y_test.reshape(-1,1)).toarray()

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

train_dataset = train_dataset.shuffle(NUM_TRAIN_SAMPLE)
train_dataset = train_dataset.repeat().batch(BATCH_SIZE)

test_dataset = test_dataset.batch(BATCH_SIZE).repeat()

### Configure the optimizer to train the model

In [14]:
# learning rate
# you can train this
LEARNING_RATE = 1e-3

# Using Adam optimizer
# and categorical_crossentropy as loss function
model.compile(optimizer=tf.train.AdamOptimizer(LEARNING_RATE), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

## Training the model

In [15]:
model.fit(train_dataset, 
          epochs=NUM_EPOCHS, 
          steps_per_epoch=PER_EPOCH_STEPS, 
          validation_data=test_dataset, 
          validation_steps=TEST_PER_EPOCH_STEPS)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x2317d457a58>