### CNN-LSTM
### Edgar Acuna
### July 27, 2021

In [1]:
# cnn lstm model
from numpy import mean
from numpy import std, unique
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.utils import to_categorical
from matplotlib import pyplot
 
# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None)
    return dataframe.values

In [2]:
 # load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [3]:
# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Subs/'
    # load all 9 files as a single array
    filenames = list()
    # total acceleration
    filenames += ['X_'+group+'.csv']
    # load input data
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.csv')
    return X, y

In [4]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train', prefix + 'NRLDataset/')
    print(trainX.shape, trainy.shape)
    # load all test
    testX, testy = load_dataset_group('test', prefix + 'NRLDataset/')
    print(testX.shape, testy.shape)
    # zero-offset class values
    trainy = trainy.astype(int)
    testy = testy.astype(int)
    #For PCA
    #trainy = trainy.astype(int)-1
    #testy = testy.astype(int)-1
    print(unique(testy))
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy


In [5]:
 # fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    # define model
    verbose, epochs, batch_size = 1, 25, 256
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    # reshape data into time steps of sub-sequences
    n_steps, n_length = 9, 189
    trainX = trainX.reshape((trainX.shape[0], n_steps, n_length, n_features))
    testX = testX.reshape((testX.shape[0], n_steps, n_length, n_features))
    print("number of features",n_features)
    # define model
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(300))
    model.add(Dropout(0.5))
    model.add(Dense(300, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    #Computing F1-score
    import numpy as np
    import sklearn
    import pandas as pd
    from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, roc_auc_score
    train_features = np.array(trainX)
    test_features = np.array(testX)
    train_labels=np.array(trainy)
    #train_labels=pd.DataFrame(trainy)
    #n_values = train_labels.idxmax(axis=1)
    y_values=np.argmax(train_labels,axis=1)
    #print(y_values)
    test_labels=np.array(testy)
    yt_values=np.argmax(test_labels,axis=1)
    train_predictions_baseline = model.predict_classes(train_features, batch_size=150)
    prob2=pd.DataFrame(model.predict_proba(test_features,batch_size=150))
    a=prob2.max(axis=1)
    print('Probability of classification',(a[a>.80].shape[0])/prob2.shape[0])
    #f1_train=sklearn.metrics.f1_score(ytrain, train_predictions_baseline, average="weighted")
    test_predictions_baseline = model.predict_classes(test_features, batch_size=150)
    #print(test_predictions_baseline)
    #f1_test=sklearn.metrics.f1_score(test_labels, test_predictions_baseline, average="weighted")
    #print('f1_scores in testing set',f1_test)
    #Calculating metrics for each class
    print("EVALUATION ON TESTING DATA")
    print(classification_report(yt_values, test_predictions_baseline))
    return accuracy
 
# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [6]:
# run an experiment
def run_experiment(repeats=1):
    # load data
    trainX, trainy, testX, testy = load_dataset()
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(trainX, trainy, testX, testy)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    # summarize results
    summarize_results(scores)
 
# run the experiment
run_experiment()

(39600, 1701, 1) (39600, 1)
(9900, 1701, 1) (9900, 1)
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55]
(39600, 1701, 1) (39600, 56) (9900, 1701, 1) (9900, 56)
number of features 1
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Instructions for updating:
Please use `model.predict()` instead.
Probabilit