**Google Drive Connection**

In [None]:
import os
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set_theme(style="darkgrid")
from collections import Counter
import numpy as np
from sklearn import metrics
import properties
import data_utils
import model_utils
import logger
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, BatchNormalization, Dropout, Bidirectional, LSTM, TimeDistributed
from keras.optimizers import SGD, Adam, RMSprop
from sklearn.preprocessing import OneHotEncoder
from keras.utils.vis_utils import plot_model

print(properties.DATA_HOME)



/content/drive/MyDrive/CSC/ECE542/Competition/data/TrainingData


**Data import and preprocess includes one hot encoding of y**

In [None]:
# Assigns data path
DATA_HOME = properties.DATA_HOME
TEST_HOME = properties.TEST_HOME
LOGGER = logger.get_logger("ECE 542")

# Segregates data into training, validation and test records
training_records = [
                    "subject_001_01__", "subject_001_02__", "subject_001_03__", "subject_001_04__", "subject_001_05__", "subject_001_06__", "subject_001_07__",
                    "subject_002_01__", "subject_002_02__", "subject_002_03__", "subject_002_04__",
                    "subject_003_01__", "subject_003_02__",
                    "subject_004_01__",
                    "subject_005_01__", "subject_005_02__",
                    "subject_006_01__", "subject_006_02__",
                    "subject_007_01__", "subject_007_02__", "subject_007_03__",
                    "subject_008_01__"
                    ]

validation_records = ["subject_001_08__",
                      "subject_002_05__",
                      "subject_003_03__",
                      "subject_004_02__",
                      "subject_005_03__",
                      "subject_006_03__",
                      "subject_007_04__"]


test_records = ["subject_009_01__",
                "subject_010_01__",
                "subject_011_01__",
                "subject_012_01__"]

# Sampling Rate dictionary to convert rate to sampling_rate object
sampling_rates = {
  "1": data_utils.SamplingRate([-0.02], 0, 0, 4),
  "2": data_utils.SamplingRate([-0.045, -0.02], 0, 1, 4),
  "4": data_utils.SamplingRate([-0.07, -0.045, -0.02, 0.005], -2, 1, 4),
  "6": data_utils.SamplingRate([-0.12, -0.095, -0.07, -0.045, -0.02, 0.005], -4, 1, 4),
  "10": data_utils.SamplingRate([-0.22, -0.195, -0.17, -0.145, -0.12, -0.095, -0.07, -0.045, -0.02, 0.005], -8, 1, 4),
  "30": data_utils.SamplingRate([-0.72, -0.695, -0.67, -0.645, -0.62, -0.595, -0.57, -0.545, -0.52, -0.495, -0.47, -0.445, -0.42, -0.395, -0.37, -0.345, -0.32, -0.295, -0.27, -0.245, -0.22, -0.195, -0.17, -0.145, -0.12, -0.095, -0.07, -0.045, -0.02, 0.005], -28, 1, 4),
  "60": data_utils.SamplingRate([-1.47, -1.445, -1.42, -1.395, -1.37, -1.345, -1.32, -1.295, -1.27, -1.245, -1.22, -1.195, -1.17, -1.145, -1.12, -1.095, -1.07, -1.045, -1.02, -0.995, -0.97, -0.945, -0.92, -0.895, -0.87, -0.845, -0.82, -0.795, -0.77, -0.745, -0.72, -0.695, -0.67, -0.645, -0.62, -0.595, -0.57, -0.545, -0.52, -0.495, -0.47, -0.445, -0.42, -0.395, -0.37, -0.345, -0.32, -0.295, -0.27, -0.245, -0.22, -0.195, -0.17, -0.145, -0.12, -0.095, -0.07, -0.045, -0.02, 0.005], -58, 1, 4)
}

# Using sampling rate (selected to 30 after several runs).
sampling_rate = sampling_rates['30']

# Importing data and preprocessing data according to sampling rate 30 and getting weights for training and validation
training_data_files = data_utils.get_data_files(DATA_HOME, training_records)
training_stream = data_utils.DataStreamer(training_data_files, sample_deltas=sampling_rate, do_shuffle=False,
                                          class_balancer=None, batch_size=1)
train_x, train_y, train_sample_weights = training_stream.preprocess()
validation_data_files = data_utils.get_data_files(DATA_HOME, validation_records)
validation_stream = data_utils.DataStreamer(validation_data_files, sample_deltas=sampling_rate, do_shuffle=False,
                                            class_balancer=None, batch_size=1)
valid_x, valid_y, valid_sample_weights = validation_stream.preprocess(n_classes=len(training_stream.classes))


[2021-04-21 16:11:59,030] [INFO] [data_utils.py] Loading data from files .... 
[2021-04-21 16:12:50,578] [INFO] [data_utils.py] Sampling data: Counter({'0': 200369, '3': 35542, '2': 15036, '1': 11325})
[2021-04-21 16:12:50,855] [INFO] [data_utils.py] Loading data from files .... 
[2021-04-21 16:13:04,719] [INFO] [data_utils.py] Sampling data: Counter({'0': 51364, '3': 16067, '2': 3231, '1': 2479})


In [None]:
print(train_x.shape)
print(train_y.shape)

(262272, 30, 6)
(262272, 4)


**Neural Network Model**

In [None]:

# NN Model
# Intialize sequential model
model = Sequential()
# Add Bidirectional LSTM layer
model.add(
    Bidirectional(
      LSTM(
          units=128, 
          input_shape=[train_x.shape[1], train_x.shape[2]] ,return_sequences=True
      )
    )
)
# Add droput layer with rate 0.5
model.add(Dropout(rate=0.5))
# Add timedistributed dense layer with relu activation
model.add(TimeDistributed(Dense(units=128, activation='relu')))
# Flatten the output
model.add(Flatten())
# Add last dense layer with softmax activation
model.add(Dense(train_y.shape[1], activation='softmax'))

# Compile the model with catetegorical crossentropy loss and adam optimizer and accuracy metric
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

# Fit the model to the data
history=model.fit(train_x, train_y,batch_size=100,epochs=20, verbose=1, validation_data=(valid_x, valid_y, valid_sample_weights), sample_weight=train_sample_weights)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
 474/2623 [====>.........................] - ETA: 1:58 - loss: 0.0745 - acc: 0.9489

0.8333082795143127

**Predictions**

In [None]:
# Traverse through all the testing file inputs and predict using the trained model
for test_record in test_records:
    LOGGER.info("Predicting for '%s' ... " % test_record)
    testing_data_file = data_utils.get_data_files(TEST_HOME, [test_record], skip_y=True)
    testing_stream = data_utils.DataStreamer(testing_data_file, sample_deltas=sampling_rates['30'], do_shuffle=False)
    test_x = testing_stream.features
    y_predicted = model.predict(test_x)
    test_file_path = os.path.join(TEST_HOME, "%sy_prediction.csv" % test_record)
    y_test_int = np.argmax(y_predicted, axis=1)

    # Saves data to csv
    data_utils.dump_labels_to_csv(y_test_int, test_file_path)

In [None]:
training_data_files = data_utils.get_data_files(DATA_HOME, training_records)
training_stream = data_utils.DataStreamer(training_data_files, sample_deltas=sampling_rate, do_shuffle=False,
                                          class_balancer=balancer, batch_size=1)
train_x, train_y, train_sample_weights = training_stream.preprocess()
validation_data_files = data_utils.get_data_files(DATA_HOME, validation_records)
validation_stream = data_utils.DataStreamer(validation_data_files, sample_deltas=sampling_rate, do_shuffle=False,
                                            class_balancer=None, batch_size=1)
valid_x, valid_y, valid_sample_weights = validation_stream.preprocess(n_classes=len(training_stream.classes))
lstm = model_utils.SimpleLSTM((train_x, train_y, train_sample_weights), (valid_x, valid_y, valid_sample_weights),
                              sampling_rate.window_size, training_stream.n_features,
                                  len(training_stream.classes), batch_size=batch_size, epochs=n_epochs)

array([0, 0, 0, ..., 0, 0, 0])