In [63]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from copy import copy
import h5py

## Loading in Data

In [64]:
EP_data = pd.read_csv(r"C:\Users\shaik\OneDrive\Desktop\MNIST\MindBigData-EP-v1.0\EP1.01.txt", sep="\t", header=None)

In [65]:
EP_data.columns = ["ID", "Event ID", "Device", "Channel", "Label", "HzCaptured", "Signal"]

# Exploring Labels

In [66]:
# Loops through the data and extracts all of the unique labels.
def get_labels_in_set(dataset, labelcolname="Label"):
    listoflabels = []
    for i in range(len(dataset)):
        newlabel = dataset[labelcolname][i]
        if newlabel not in listoflabels:
            listoflabels.append(newlabel)
    return listoflabels

In [67]:
# Unique Labels in IN Dataset
listoflabels = get_labels_in_set(EP_data, "Label")
print("Labels In Data Set: ", sorted(listoflabels))

Labels In Data Set:  [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


## Filtering Data by Hz

In [68]:
# Filters out all observations with <250 Hz Captured. Needed for the 250 nodes in the input layer of our neural network.
def filter_data(dataset, Hz_colname, filternum=250):
    data_filter = dataset[dataset[Hz_colname] >= filternum]
    print("lengths of dataset", len(dataset),
          "length of filtered set:", len(data_filter),
          "length that was filtered out:", len(dataset[dataset[Hz_colname] < filternum]))
    print("Percent of Original Data Retained:", round(len(data_filter) / len(dataset) * 100, 2), "%")
    return data_filter
    

In [69]:
EP_data_filter = filter_data(EP_data, "HzCaptured", 250)

lengths of dataset 910476 length of filtered set: 910056 length that was filtered out: 420
Percent of Original Data Retained: 99.95 %


In [70]:
del EP_data

## Filtering Data by -1 Label

In [71]:
def filter_negatives(dataset, label_colname):
    data_filter = dataset[dataset[label_colname] != -1]
    print("lengths of dataset", len(dataset),
          "length of filtered set", len(data_filter),
          "length that was filtered out", len(dataset[dataset[label_colname] == -1]))
    print("Percent of Original Data Retained", round(len(data_filter) / len(dataset) * 100, 2), "%")
    return data_filter
    

In [72]:
EP_data_filter = filter_negatives(EP_data_filter, "Label")

lengths of dataset 910056 length of filtered set 907830 length that was filtered out 2226
Percent of Original Data Retained 99.76 %


## Converting Signal Strings to Arrays of Floats

In [73]:
EP_data_array = EP_data_filter.to_numpy()
print(EP_data_array.shape)

(907830, 7)


In [74]:
# This converts the string signal data into an array and then converts each string element into a float element.
def string_to_float(stringed_signal_data):
    float_signal_data = np.empty((len(stringed_signal_data)), dtype=object)
    for n in range(len(float_signal_data)):
        single_signal_observation = stringed_signal_data[n][6].split(",")
        single_signal_observation = [float(i) for i in single_signal_observation]        
        float_signal_data[n] = single_signal_observation
    print("The Shape of the Array we created after converting to floats:", float_signal_data.shape)
    print("The Shape of the Original Array of Stringed Signal Data:     ", stringed_signal_data[:,6].shape)

    return float_signal_data

In [75]:
EP_signal_float = string_to_float(EP_data_array)

The Shape of the Array we created after converting to floats: (907830,)
The Shape of the Original Array of Stringed Signal Data:      (907830,)


In [76]:
del EP_data_filter

## Cropping the Signal Data

In [77]:
def crop_signal(signal_float):
    signal_cropped = np.empty((len(signal_float)), dtype=object)
    for n in range(len(signal_cropped)):
        signal_cropped[n] = np.array(signal_float[n][:250])
    print("Shape of 1st sample in signal", signal_cropped[0].shape)
    print("Shape of all signal data", signal_cropped.shape)
    return signal_cropped

In [78]:
EP_signal_cropped = crop_signal(EP_signal_float)

Shape of 1st sample in signal (250,)
Shape of all signal data (907830,)


In [79]:
del EP_signal_float

## Grouping X (by event)

In [80]:
# This groups everything. First, create an empty array, then create mini arrays of c observations grouped together.
def group_x(x_,channels):
    grouped_x = np.empty((len(x_)//channels), dtype=object)
    temp_x = []
    for i in range(len(x_)):
        temp_x.append(x_[i])
        if (i+1) % channels == 0:
            j = ((i+1)//channels)-1
            grouped_x[j] = np.array(temp_x)
            temp_x = []
    
    print("Check if everything is of type array:", type(grouped_x), type(grouped_x[10]), type(grouped_x[15][9]))
    print("Shape of Grouped X:", grouped_x.shape, "\nShape of Original X Divided by C:", len(x_)//channels)
    
    return grouped_x

In [81]:
EP_grouped_x = group_x(EP_signal_cropped, 14)

Check if everything is of type array: <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
Shape of Grouped X: (64845,) 
Shape of Original X Divided by C: 64845


In [82]:
del EP_signal_cropped

In [83]:
EP_grouped_x.dtype # Tensorflow can't handle Objects "O" dtypes. Luckily this is automatically fixed when flattening.

dtype('O')

## Checking Input Shape

In [84]:
def change_shape(x_, new_shape_order):
    if new_shape_order == "channel-time":
        copy_x = []
        for i in x_:
            copy_x.append(i)
        copy_x = np.array(copy_x)
    elif new_shape_order == "time-channel":
        copy_x = []
        for i in x_:
            copy_x.append(i.T)
        copy_x = np.array(copy_x)
    elif new_shape_order == "flattened":
        copy_x = []
        for i in x_:
            copy_x.append(i.T.flatten())
        copy_x = np.array(copy_x)
    
    print("New Shape:", copy_x.shape)
    print("Sample of X:", copy_x[13])
    return copy_x

In [85]:
EP_x = change_shape(EP_grouped_x, "flattened")

New Shape: (64845, 3500)
Sample of X: [4387.692307 4537.435897 4528.205128 ... 4647.692307 3988.717949
 4062.051282]


In [86]:
del EP_grouped_x 

## Feature Scaling

In [87]:
print(EP_x.shape)

(64845, 3500)


In [88]:
temp_x = np.array(([1.0,2.0,3.0],
          [4.0,5.0,6.0],
          [7.0,1.0,14.0],
          [10.0,11.0,12.0]))
print(temp_x.shape)
print(temp_x)

(4, 3)
[[ 1.  2.  3.]
 [ 4.  5.  6.]
 [ 7.  1. 14.]
 [10. 11. 12.]]


In [89]:
def scale_features(x_prescaled):
    for feature in range(x_prescaled.T.shape[0]):
        feature_min = min(x_prescaled.T[feature])
        feature_max = max(x_prescaled.T[feature])
        feature_range = feature_max - feature_min
        x_prescaled.T[feature] = ( x_prescaled.T[feature] - feature_min ) / feature_range
    return x_prescaled

In [90]:
# Verified that this works properly
scaled_x = scale_features(EP_x)

In [91]:
scaled_x.shape

(64845, 3500)

In [92]:
del EP_x

## Mean Normalization

In [93]:
def mean_normalize(x_prenormalized):
    for feature in range(x_prenormalized.T.shape[0]):
        feature_mean = sum(x_prenormalized.T[feature]) / len(x_prenormalized.T[feature])
        x_prenormalized.T[feature] = x_prenormalized.T[feature] - feature_mean
    return x_prenormalized

In [94]:
# Verified that this works properly.
normalized_x = mean_normalize(scaled_x)

In [95]:
del scaled_x

## Reshaping X

In [96]:
def reshape_x(pre_x, channels, timepoints):
    new_x = np.reshape(pre_x, (pre_x.shape[0], channels, timepoints))
    return new_x

In [97]:
EP_x = reshape_x(normalized_x, 14, 250)

In [98]:
del normalized_x

## 1-Hot Encoding Labels

In [99]:
def create_y(dataarray_set, c):
    y_preprocess = dataarray_set[:,4]
    print("Y Original Length:", len(y_preprocess), "\nExamples of Y:", y_preprocess[:30])
    
    y_divided = [y_preprocess[i] for i in range(len(y_preprocess)) if (i+1)%c == 0] # Extract 1 label per event instead of c
    print("\n Y Length after filtering out event duplicates (dividing by c):", len(y_divided))
    print("Examples of Y after filtering:", y_divided[:30])
    
    return y_divided

In [100]:
# This creates an equivalent array of arrays using the encoding system.
def encode_hot_y(dataarray_set, labelslist, c):
    
    y_empty = np.empty((len(dataarray_set)//c), dtype=object)
    
    for i in range(len(y_empty)):
        y_empty[i] = np.zeros((len(labelslist)-1), int)
        
    y_ = create_y(dataarray_set, c)
    
    print("\n What y array looks like before assigning 1s: \n", y_empty[:10])
    
    for i in range(len(y_empty)): # This encodes the 1 for each label
        n = y_[i]
        y_empty[i][n] = 1
        
    print("What y array looks like after assigning 1s: \n", y_empty[:10])
    
    return y_empty
    
    

In [101]:
EP_encode_y = encode_hot_y(EP_data_array, listoflabels, 14)

Y Original Length: 907830 
Examples of Y: [6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 9 9]

 Y Length after filtering out event duplicates (dividing by c): 64845
Examples of Y after filtering: [6, 7, 9, 9, 0, 0, 8, 6, 6, 5, 6, 3, 9, 4, 2, 8, 9, 0, 7, 2, 7, 1, 1, 9, 5, 0, 1, 2, 6, 1]

 What y array looks like before assigning 1s: 
 [array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
What y array looks like after assigning 1s: 
 [array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
 array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 array([1, 0, 0, 0, 0,

In [102]:
del EP_data_array

In [103]:
def fix_dtype(y_):
    print("Previous Dtype:", y_.dtype)
    if y_.dtype == 'O':
        y_ = np.vstack(y_[:]).astype(np.float)
        print("Fixed. New Dtye:", y_.dtype)
        return y_
    else:
        return "Not Object Type:", y_dtype

In [104]:
EP_y = fix_dtype(EP_encode_y)

Previous Dtype: object
Fixed. New Dtye: float64


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_ = np.vstack(y_[:]).astype(np.float)


In [105]:
del EP_encode_y

## Saving X & Y

In [106]:
print(EP_x.nbytes/(1024**3), "GB")
print(EP_y.nbytes/(1024**3), "GB")

1.6909651458263397 GB
0.004831328988075256 GB


In [107]:
def save_data(x, y, dataName):
    with h5py.File(dataName + '_x.h5', 'w') as hf:
        hf.create_dataset(dataName + "_x_dataset", data=x)
        
    with h5py.File(dataName + "_y.h5", "w") as hf:
        hf.create_dataset(dataName + "_y_dataset", data=y)

In [108]:
save_data(EP_x, EP_y, "EP_preprocessed")

In [109]:
del EP_x
del EP_y

In [110]:
import numpy as np
import h5py
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import TensorBoard

In [None]:
Functions

In [111]:
def load_data(filename, dataname):
    with h5py.File(filename, 'r') as hf:
        x = hf[dataname][:]
    with h5py.File('EP_preprocessed_y.h5', 'r') as hf:
        y = hf['EP_preprocessed_y_dataset'][:]
    return x,y

In [112]:
def reshape_x(pre_x, channels, timepoints):
    new_x = np.reshape(pre_x, (pre_x.shape[0], channels, timepoints))
    return new_x

In [113]:
def train_test_split(file, data):
    x_,y_ = load_data(file, data)
    
    x_train, x_test = x_[:int(len(x_)*.80)], x_[-int(len(x_)*.20):]
    y_train, y_test = y_[:int(len(y_)*.80)], y_[-int(len(y_)*.20):]
    
    print("X train shape: ", x_train.shape)
    print("X test shape: ", x_test.shape)
    print("Y train shape: ", y_train.shape)
    print("Y test shape: ", y_test.shape)
    
    return x_train, x_test, y_train, y_test

In [152]:
def build_CNN(convLayer, denseLayer, nodes):
    model = keras.Sequential()

    model.add(keras.layers.Conv1D(nodes, 1, input_shape=(x_train.shape[1:]), data_format="channels_last", activation="relu"))
    model.add(keras.layers.MaxPooling1D(pool_size=2, padding='same'))

    for l in range(convLayer-1):
        model.add(keras.layers.Conv1D(nodes, 1, activation="relu"))
        model.add(keras.layers.MaxPooling1D(pool_size=2, padding='same'))

    model.add(keras.layers.Dropout(.5))
    model.add(keras.layers.Flatten())

    for l in range(denseLayer-1):
        model.add(keras.layers.Dense(nodes, activation="relu"))

    model.add(keras.layers.Dense(10, activation="softmax"))
    return model

In [153]:
def get_tensorboard(model_name, logdir):
    tensorboard = TensorBoard(log_dir=f'{logdir}\\{model_name}')
    #tensorboard = TensorBoard(log_dir=f'logs\\{model_name}')
    print("Model Name:", model_name)
    return tensorboard

In [154]:
conv_layers = [3,2]
dense_layers = [2,1]
layer_sizes = [256,64,16]

In [155]:
# Data Files and Data Sets
data_files = ["EP_preprocessed_x.h5"]

data_sets = ["EP_preprocessed_x_dataset"]

In [156]:
for i in range(len(data_files)):
    x_train, x_test, y_train, y_test = train_test_split(data_files[i], data_sets[i])
    
    for conv_layer in conv_layers:
        for dense_layer in dense_layers:
            for layer_size in layer_sizes:

                # --- Tensorboard Callback ---
                NAME = f"CNN-{conv_layer}-conv-{dense_layer}-dense-{layer_size}-nodes-{int(time.time())}"
                tensorboard = get_tensorboard(model_name=NAME, logdir='logs_ConvNet')

                # --- Convolutional Neural Network ---
                model = build_CNN(conv_layer, dense_layer, layer_size)
                model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])    
                model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), callbacks=[tensorboard])

X train shape:  (51876, 14, 250)
X test shape:  (12969, 14, 250)
Y train shape:  (51876, 10)
Y test shape:  (12969, 10)
Model Name: CNN-3-conv-2-dense-256-nodes-1650307316
Epoch 1/10
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-3-conv-2-dense-64-nodes-1650307685
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-3-conv-2-dense-16-nodes-1650307748
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-3-conv-1-dense-256-nodes-1650307782
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-3-conv-1-dense-64-nodes-1650307899
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-3-conv-1-dense-16-nodes-1650307952
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-2-dense-256-nodes-1650307985
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-2-dense-64-nodes-1650308121
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-2-dense-16-nodes-1650308174
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-1-dense-256-nodes-1650308207
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-1-dense-64-nodes-1650308309
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model Name: CNN-2-conv-1-dense-16-nodes-1650308360
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
