In [0]:
#from google.colab import drive
#drive.mount('/content/drive')

In [0]:
import sys
import h5py
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

np.random.seed(0)

# Utils




In [0]:

def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name
    
    

# Data

In [0]:
# INTRA train 

directory = "/content/drive/My Drive/DL-final_project/Data/Intra/train/" 
subject_id = 105923

class_names = ['rest', 'task_motor', 'task_story_math', 'task_working_memory']
n_sensors = 248        # FIXED: No. of features (sensors) in the signal
steps_per_file = 35624 # FIXED

WINDOW_SIZE = 1000
WINDOW_SHIFT = 50

# Normalization stuff
maxs_file_path = "/content/drive/My Drive/DL-final_project/data_stats/intra_train_maxs.txt"
mins_file_path = "/content/drive/My Drive/DL-final_project/data_stats/intra_train_mins.txt"
maxs_array = np.loadtxt(maxs_file_path, delimiter=',')
mins_array = np.loadtxt(mins_file_path, delimiter=',')

In [0]:
def normalize(matrix, class_id):
  maxs = maxs_array[:, class_id]
  maxs = maxs.reshape(maxs.shape[0],1)
  mins = mins_array[:, class_id]
  mins = mins.reshape(mins.shape[0],1)
  return (matrix-mins)/(maxs-mins)

def subsample(matrix, skip=5):
  return matrix[:, ::skip]


def get_chunk(chunk_id, subject_id=105923, val_split=0.2, shuffle=False):
  '''
  Returns data for all 4 classes from their specified chunk file
  '''

  # Initialize the dataset
  data_X = []
  data_Y = []

  for class_id, class_name in enumerate(class_names):

    filename_path = directory + '/' + class_name + '_' + str(subject_id) + '_' + str(chunk_id) + '.h5'

    ### Supplied code
    with h5py.File(filename_path,'r') as f:
      dataset_name = get_dataset_name(filename_path)
      matrix = f.get(dataset_name)[()]
    ###

    matrix = normalize(matrix, class_id)
    matrix = subsample(matrix, skip=3)
    matrix = matrix.T
    for i in range(WINDOW_SIZE-1, matrix.shape[0], WINDOW_SHIFT):
      #print(class_name, chunk, i)
      x_instance = matrix[i-WINDOW_SIZE+1: i+1, :]
      #x_instance = normalize(x_instance)
      data_X.append(x_instance)
      data_Y.append(class_id)


  data_X = np.asarray(data_X)
  data_X = data_X.astype(np.float16)
  data_Y = np.asarray(data_Y).astype(np.int8)

  if shuffle: 
    indices = np.arange(data_X.shape[0])
    np.random.shuffle(indices)
    data_X = data_X[indices,:,:]
    data_Y = data_Y[indices]
                        

  n_instances = data_X.shape[0]
  n_instances_train = round((1-val_split) * n_instances)
  train_data_X = data_X[:n_instances_train,:,:]
  train_data_Y = data_Y[:n_instances_train]
  val_data_X = data_X[n_instances_train:,:,:]
  val_data_Y = data_Y[n_instances_train:]

  print("Dataset info ---")
  print("train_data_X shape:", train_data_X.shape)
  print("train_data_X size in bytes:", sys.getsizeof(train_data_X))
  print("train_data_X data type:", train_data_X.dtype)
  print("train_data_Y shape:", train_data_Y.shape)
  print("\n")
  print("val_data_X shape:", val_data_X.shape)
  print("val_data_X size in bytes:", sys.getsizeof(val_data_X))
  print("val_data_X data type:", val_data_X.dtype)
  print("val_data_Y shape:", val_data_Y.shape)   
  print("----------------")

  #return train_data_X, train_data_Y, val_data_X, val_data_Y
  return val_data_X, val_data_Y


def build_Dataset(train_val_split=0.8, batch_size=16):
    data_X, data_Y = get_chunk(chunk_id)

    n_train = int(train_val_split * data_Y.shape[0])
    train_data_X, train_data_Y = data_X[:n_train,:,:], data_Y[:n_train]
    val_data_X, val_data_Y = data_X[n_train:,:,:], data_Y[n_train:]


    train_data = tf.data.Dataset.from_tensor_slices((train_data_X, train_data_Y))
    train_data = train_data.cache().shuffle(data_Y.shape[0]).batch(batch_size)

    val_data = tf.data.Dataset.from_tensor_slices((val_data_X, val_data_Y))
    val_data = val_data.batch(batch_size)

    return train_data, val_data



def data_generator(subject_id=105923, mode='training', val_split=0.2, shuffle=False):
  '''
  Generate 4 chunk, one one class 
  '''

  chunk_id = 1

  while True:
    # Initialize the dataset
    data_X = []
    data_Y = []

    for class_id, class_name in enumerate(class_names):
      filename_path = directory + '/' + class_name + '_' + str(subject_id) + '_' + str(chunk_id) + '.h5'

      ### Supplied code
      with h5py.File(filename_path,'r') as f:
        dataset_name = get_dataset_name(filename_path)
        matrix = f.get(dataset_name)[()]
      ###

      matrix = normalize(matrix, class_id)
      matrix = subsample(matrix, skip=3)
      matrix = matrix.T
      for i in range(WINDOW_SIZE-1, matrix.shape[0], WINDOW_SHIFT):
        x_instance = matrix[i-WINDOW_SIZE+1: i+1, :]
        data_X.append(x_instance)
        data_Y.append(class_id)


    data_X = np.asarray(data_X)
    data_X = data_X.astype(np.float16)
    data_Y = np.asarray(data_Y).astype(np.int8)

    if shuffle: 
      indices = np.arange(data_X.shape[0])
      np.random.shuffle(indices)
      data_X = data_X[indices,:,:]
      data_Y = data_Y[indices]
      

    n_instances = data_X.shape[0]
    n_instances_train = round((1-val_split)*n_instances)

    if mode == 'training':
      print("\nYielding: Chunk {} (training)".format(chunk_id))
      yield tuple((data_X[:n_instances_train,:,:], data_Y[:n_instances_train]))

    if mode == 'validation':
      yield tuple((data_X[n_instances_train:,:,:], data_Y[n_instances_train:]))

    chunk_id += 1
    if chunk_id > 8:
      chunk_id = 1
      #raise Exception("All files yielded")


# Model

In [0]:
def CNN_model(time_window_size, n_conv_layers, kernel_sizes, hyper_params={'lr':0.001}):
    tf.random.set_seed(0)
    tf.keras.backend.clear_session()

    model = models.Sequential()
    model.add(layers.Conv1D(filters=32, kernel_size=3, strides=1, 
                            activation='relu', 
                            input_shape=(time_window_size, 248)))
    model.add(layers.MaxPooling1D(pool_size=2))

    for i in range(n_conv_layers):
        model.add(layers.Conv1D(filters=32, kernel_size=3, strides=1, activation='relu'))
        model.add(layers.MaxPooling1D(pool_size=2))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(units=64, activation='relu'))
    model.add(layers.Dense(units=4, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hyper_params['lr']),
                  loss='sparse_categorical_crossentropy',
                  metrics=[ 'accuracy'])

    model.summary()
    return model

In [0]:
# Get data
USE_GENERATOR = True

if not USE_GENERATOR:
  chunk_id = 1  # 1 to 8 for INTRA training set
  # Get 4 chunks - one chunk per class
  train_data_X, train_data_Y, val_data_X, val_data_Y = get_chunk(chunk_id)

else:
  # Use generators
  train_generator = data_generator(mode='training', shuffle=True)
  val_generator = data_generator(mode='validation', shuffle=True)
  

In [8]:
baseline = CNN_model(1000, 3, [3,3], hyper_params={'lr':0.001})
val_data_X, val_data_Y = get_chunk(chunk_id=1, shuffle=True)

if not USE_GENERATOR:
  
  baseline.fit(train_data_X, train_data_Y,
              batch_size = 8,
              validation_data = tuple([val_data_X, val_data_Y]),
              epochs=10)
  
else:
  baseline.fit(train_generator,
               steps_per_epoch = 8,
               validation_data = tuple([val_data_X, val_data_Y]),
               validation_steps = 8,
               epochs=50)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 998, 32)           23840     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 499, 32)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 248, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 246, 32)           3104      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 123, 32)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 121, 32)           3

In [9]:
baseline.predict(val_data_X)

array([[3.34193082e-08, 2.81708144e-05, 9.99971747e-01, 4.97601604e-08],
       [1.70386245e-03, 3.60451802e-03, 6.00777392e-04, 9.94090736e-01],
       [1.25989843e-07, 1.87193640e-04, 9.99812543e-01, 1.25740655e-07],
       [4.00258529e-07, 3.63835687e-04, 9.99635100e-01, 6.36183302e-07],
       [1.25175148e-07, 1.30394721e-04, 9.99869347e-01, 1.66939287e-07],
       [9.99981999e-01, 1.97020981e-07, 1.59304932e-07, 1.75614041e-05],
       [1.47442927e-06, 9.99969363e-01, 2.90979606e-05, 1.57698238e-07],
       [1.25336260e-06, 9.99976277e-01, 2.23155057e-05, 1.28497760e-07],
       [1.59374089e-03, 3.82253504e-03, 1.35401648e-03, 9.93229628e-01],
       [2.40235158e-06, 9.99957681e-01, 3.96591677e-05, 2.97316802e-07],
       [1.48489823e-06, 1.38765574e-03, 9.98609304e-01, 1.57426462e-06],
       [1.89013690e-06, 9.99948859e-01, 4.90177517e-05, 2.14802313e-07],
       [2.36080851e-07, 2.03636053e-04, 9.99795854e-01, 2.94102563e-07],
       [9.99994159e-01, 3.91838064e-08, 3.15451203e

In [10]:
val_data_Y

array([2, 3, 2, 2, 2, 0, 1, 1, 3, 1, 2, 1, 2, 0, 1, 1, 1, 1, 0, 2, 2, 1,
       1, 2, 3, 2, 0, 1, 3, 3, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1,
       2, 0, 0, 3, 2, 0, 3, 2, 0, 1, 0, 3, 0, 3, 2, 1, 2, 1, 0, 1, 2, 2,
       1, 1, 3, 1, 2, 0, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 3, 0, 0, 0, 3, 0,
       3, 1, 0, 0, 2, 2, 2, 1, 0, 0, 1, 3, 0, 1, 0, 1, 1, 3, 2, 0, 2, 1,
       1, 1, 3, 2, 2, 0, 3, 0, 3, 0, 0, 2, 2, 3, 0, 1, 3, 2, 2, 2, 3, 1,
       1, 1, 0, 3, 3, 3, 0, 0, 2, 1, 3, 3, 0, 3, 0, 2, 3, 3, 0, 0, 2, 2,
       3, 1, 1, 2, 2, 0, 2, 3, 3, 1, 3, 0, 1, 3, 3, 3, 0, 2, 2, 3],
      dtype=int8)

In [0]:
baseline.save("")