In [1]:
# What version of Python do you have?
import sys

import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf

print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Init Plugin
Init Graph Optimizer
Init Kernel
Tensor Flow Version: 2.5.0
Keras Version: 2.5.0

Python 3.9.7 (default, Sep 16 2021, 23:53:23) 
[Clang 12.0.0 ]
Pandas 1.3.5
Scikit-Learn 1.0.2
GPU is available


In [2]:
import pyarrow.feather as feather
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# loading in the training data
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/train_16/eyesclosed_train.feather'
df_closed = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/train_16/eyesopen_train.feather'
df_open = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/train_16/mathematic_train.feather'
df_math = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/train_16/memory_train.feather'
df_memory = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/train_16/music_train.feather'
df_music = feather.read_feather(path)

In [4]:
# is data normalized?
print(df_closed.iloc[:, :-1].mean())
print(df_closed.iloc[:, :-1].std())

0       -3.576279e-07
1        4.172325e-07
2        1.192093e-07
3       -7.152557e-07
4       -8.344650e-07
             ...     
29994    8.940697e-07
29995    7.152557e-07
29996    3.576279e-07
29997    5.960464e-07
29998   -1.192093e-07
Length: 29999, dtype: float16
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29994    0.0
29995    0.0
29996    0.0
29997    0.0
29998    0.0
Length: 29999, dtype: float16


Apparently mean and sd of df_closed are 0 at this precision level!

In [5]:
# loading in the validation data
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/val_16/eyesclosed_val.feather'
df_closed_val = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/val_16/eyesopen_val.feather'
df_open_val = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/val_16/mathematic_val.feather'
df_math_val = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/val_16/memory_val.feather'
df_memory_val = feather.read_feather(path)
path = '/Users/asgnxt/mne-miniconda/mne_data/train_val_16/val_16/music_val.feather'
df_music_val = feather.read_feather(path)


In [6]:
# determine the number of samples in each dataframe
print(df_closed.shape)
print(df_open.shape)
print(df_math.shape)
print(df_memory.shape)
print(df_music.shape)
print(df_closed_val.shape)
print(df_open_val.shape)
print(df_math_val.shape)
print(df_memory_val.shape)
print(df_music_val.shape)

(7076, 30000)
(7076, 30000)
(7259, 30000)
(7259, 30000)
(7076, 30000)
(854, 30000)
(854, 30000)
(854, 30000)
(854, 30000)
(915, 30000)


The training data is either 7076 or 7259 rows x 30000 columns. Given there are 61 channels of EEG, there are 7076 / 61 = 116 / 119 distinct recordings of 300 sec each (100 Hz sampling).

Imagining an EEG 'frame' of 61 x 61 (61 channels x 610 ms); each row can be thought of as a movie with ~492 frames. Each activity has a training set of 492 x 116 or 492 x 119 frames of data from a subset of subjects and sessions

In [28]:
print((30000/61)*116*5) # number of training epochs
print((854/61)*(30000/61)) # number of validation epochs

285245.9016393443
6885.245901639344


In [46]:
# defining parameters for the model

batch_size = 32

img_width = 61 

img_height = 61

num_channels = 61
print(f'Number of channels: {num_channels}')
# defining the number of samples

num_samples = 30000
print(f'Number of samples: {num_samples}')

# defining the number of frames
num_frames = num_samples/num_channels
print(f'image_size = 61 x 61')
print(f'Number of images per row: {num_frames}')

# defining the number of classes
num_classes = 5
print(f'Number of classes: {num_classes}')

# defining the number of epochs
num_training_epochs = num_frames * 116 * num_classes
print(f'num_training_epochs = {num_training_epochs.__round__()}')

Number of channels: 61
Number of samples: 30000
image_size = 61 x 61
Number of images per row: 491.8032786885246
Number of classes: 5
num_training_epochs = 285246


In [79]:
# create labels for each dataframe with float16 precision
df_closed['label'] = 0
df_open['label'] = 1
df_math['label'] = 2
df_memory['label'] = 3
df_music['label'] = 4
df_closed_val['label'] = 0
df_open_val['label'] = 1
df_math_val['label'] = 2
df_memory_val['label'] = 3
df_music_val['label'] = 4

In [80]:
# force the labels to be float16 precision
df_closed['label'] = df_closed['label'].astype('float16')
df_open['label'] = df_open['label'].astype('float16')
df_math['label'] = df_math['label'].astype('float16')
df_memory['label'] = df_memory['label'].astype('float16')
df_music['label'] = df_music['label'].astype('float16')
df_closed_val['label'] = df_closed_val['label'].astype('float16')
df_open_val['label'] = df_open_val['label'].astype('float16')
df_math_val['label'] = df_math_val['label'].astype('float16')
df_memory_val['label'] = df_memory_val['label'].astype('float16')
df_music_val['label'] = df_music_val['label'].astype('float16')


In [70]:
# ensure that the dataframes are correctly labeled
df_music.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29991,29992,29993,29994,29995,29996,29997,29998,29999,label
0,1.6e-05,-2e-05,-2.8e-05,-1.597404e-05,-3.4e-05,-2.3e-05,2e-05,-2e-06,-2.2e-05,-2e-06,...,-1.3e-05,-1.8e-05,-4e-06,4e-06,-3e-05,-4.2e-05,-2.2e-05,-1.3e-05,-1.7e-05,4.0
1,2.4e-05,-2.7e-05,-1.5e-05,-1.591444e-05,-1.9e-05,-2.1e-05,-1.9e-05,-5e-06,-2.5e-05,-2.8e-05,...,-1.7e-05,-1.5e-05,-7e-06,-7e-06,-2.9e-05,-3.2e-05,-2.7e-05,-1.8e-05,-2.2e-05,4.0
2,-1.1e-05,1.1e-05,-2.1e-05,9.536743e-07,2.3e-05,7e-06,3e-05,3e-05,-6e-06,4e-06,...,-3e-06,-2e-06,1.3e-05,1.5e-05,-9e-06,-2.3e-05,-5e-06,-2e-06,-1.1e-05,4.0
3,2.4e-05,-2.7e-05,-1.6e-05,-1.943111e-05,-2.3e-05,-1.7e-05,-1.6e-05,-7e-06,-1.4e-05,-1.5e-05,...,-1.6e-05,-1.6e-05,-1.2e-05,-9e-06,-2.3e-05,-2.2e-05,-2.2e-05,-1.8e-05,-2e-05,4.0
4,1.4e-05,-1.6e-05,-9e-06,-1.251698e-05,-1.6e-05,-1.6e-05,-2e-05,-1.7e-05,-2.6e-05,-2.8e-05,...,-9e-06,-9e-06,-3e-06,-3e-06,-1.6e-05,-1.7e-05,-1.5e-05,-1.1e-05,-1.2e-05,4.0


In [82]:
# Creating lists from each dataframe, each list contains one frame of data
list_df_closed = np.array_split(df_closed, 116)
print(list_df_closed[0].shape)
list_df_open = np.array_split(df_open, 116)
print(list_df_open[0].shape)
list_df_math = np.array_split(df_math, 119)
print(list_df_math[0].shape)
list_df_memory = np.array_split(df_memory, 119)
print(list_df_memory[0].shape)
list_df_music = np.array_split(df_music, 116)
print(list_df_music[0].shape)
list_df_closed_val = np.array_split(df_closed_val, 14)
print(list_df_closed_val[0].shape)
list_df_open_val = np.array_split(df_open_val, 14)
print(list_df_open_val[0].shape)
list_df_math_val = np.array_split(df_math_val, 14)
print(list_df_math_val[0].shape)
list_df_memory_val = np.array_split(df_memory_val, 14)
print(list_df_memory_val[0].shape)
list_df_music_val = np.array_split(df_music_val, 15)
print(list_df_music_val[0].shape)


(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)
(61, 30001)


In [72]:
# Create a training dataset with multiple sessions / subjects
training_examples = []
for i in range(116):
    training_examples.append(list_df_closed[i])
    training_examples.append(list_df_open[i])
    training_examples.append(list_df_math[i])
    training_examples.append(list_df_memory[i])
    training_examples.append(list_df_music[i])
    

In [83]:
# Create a validation dataset with multiple sessions / subjects
validation_examples = []
for i in range(14):
    validation_examples.append(list_df_closed_val[i])
    validation_examples.append(list_df_open_val[i])
    validation_examples.append(list_df_math_val[i])
    validation_examples.append(list_df_memory_val[i])
    validation_examples.append(list_df_music_val[i])

In [47]:
# defining parameters for the model

batch_size = 32

img_width = 61 

img_height = 61

num_channels = 61
print(f'Number of channels: {num_channels}')
# defining the number of samples

num_samples = 30000
print(f'Number of samples: {num_samples}')

# defining the number of frames
num_frames = num_samples/num_channels
print(f'image_size = 61 x 61')
print(f'Number of images per row: {num_frames}')

# defining the number of classes
num_classes = 5
print(f'Number of classes: {num_classes}')

# defining the number of epochs
num_training_epochs = num_frames * 116 * num_classes
print(f'num_training_epochs = {num_training_epochs.__round__()}')


Number of channels: 61
Number of samples: 30000
image_size = 61 x 61
Number of images per row: 491.8032786885246
Number of classes: 5
num_training_epochs = 285246


In [73]:
# creating a single training dataframe
training_examples = pd.concat(training_examples)
print(training_examples.shape)

(35380, 30001)


In [84]:
# creating a single validation dataframe
validation_examples = pd.concat(validation_examples)
print(validation_examples.shape)

(4270, 30001)


In [74]:
# ensuring uniform dtype
training_examples.dtypes

0        float16
1        float16
2        float16
3        float16
4        float16
          ...   
29996    float16
29997    float16
29998    float16
29999    float16
label    float16
Length: 30001, dtype: object

In [76]:
# create a separate target dataframe
target = training_examples.pop('label')
print(target.shape)
print(target.head())

(35380,)
0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: label, dtype: float16


In [None]:
train_ds = tf.keras.utils.Sequence(training_examples, batch_size=batch_size, num_channels=num_channels, img_height=img_height, img_width=img_width, num_classes=num_classes)

In [78]:
# convert the dataframe to a tensor
training_examples = tf.convert_to_tensor(training_examples)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-03-24 14:26:07.637631: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-24 14:26:07.637866: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [134]:
# Create a model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(30000, 1)),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

    

In [135]:
# Compile the model
model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])


In [136]:
# Train the model
model.fit(training_examples, target, epochs=5, batch_size=32)


Epoch 1/5
   1/1106 [..............................] - ETA: 4:23 - loss: 2.3026 - accuracy: 0.1250

2022-03-24 15:38:09.827059: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x142946d60>