In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd drive/MyDrive/C247Project/project/

In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Permute, Bidirectional, BatchNormalization, Conv2D, GRU, Dense, Dropout, ELU, Flatten, MaxPool2D, TimeDistributed, Dense
from tensorflow.keras.regularizers import L1L2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.initializers import lecun_uniform
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt

In [None]:
tf.test.gpu_device_name()

'/device:GPU:0'

## Import Datasets

In [None]:
X_test = np.load("Dataset/X_test.npy")
y_test = np.load("Dataset/y_test.npy")
X_train_valid = np.load("Dataset/X_train_valid.npy")
y_train_valid = np.load("Dataset/y_train_valid.npy")
person_train_valid = np.load("Dataset/person_train_valid.npy")
person_test = np.load("Dataset/person_test.npy")
X_synthetic = np.load("synthetic_data.npy")

## Adjusting the labels 
y_train_valid -= 769
y_test -= 769

print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))
print ('Synthetic data shape: {}'.format(X_synthetic.shape))
print ('Synthetic target shape: {}'.format(y_train_valid.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)
Fake data shape: (2115, 22, 1000)
Fake target shape: (2115,)


In [None]:
y_synthetic = y_train_valid
print(y_synthetic[0:10])

[2 3 0 0 0 0 2 1 3 3]


## Function Definitions

In [None]:
def data_prep(X,y,sub_sample,average,noise):
    total_X = None
    total_y = None
    
    # Trimming the data (sample,22,1000) -> (sample,22,500)
    X = X[:,:,0:500]

    # Maxpooling the data (sample,22,1000) -> (sample,22,500/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, sub_sample), axis=3)
    
    total_X, total_y = X_max, y
    
    # Averaging + noise 
    X_average = np.mean(X.reshape(X.shape[0], X.shape[1], -1, average),axis=3)
    X_average = X_average + np.random.normal(0.0, 0.5, X_average.shape)
    
    total_X = np.vstack((total_X, X_average))
    total_y = np.hstack((total_y, y))
    
    # Subsampling
    for i in range(sub_sample):
        X_subsample = X[:, :, i::sub_sample] + \
                            (np.random.normal(0.0, 0.5, X[:, :,i::sub_sample].shape) if noise else 0.0)
        total_X = np.vstack((total_X, X_subsample))
        total_y = np.hstack((total_y, y))
    
    return total_X,total_y
    

def print_shapes(title, x_1, y_1, x_2, y_2):
    print("Shape of [", title, "] data")
    print(x_1)
    print(y_1)
    print(x_2)
    print(y_2)

In [None]:
def sub_selection(idx, X_train, y_train, X_test, y_test, person_train_valid, person_test):
    train_idx = np.where(person_train_valid == idx)[0]
    test_idx = np.where(person_test == idx)[0]
    X_train = X_train[train_idx]
    y_train = y_train[train_idx]
    X_test = X_test[test_idx]
    y_test = y_test[test_idx]
    return X_train, y_train, X_test, y_test


def train(model, params, data, idx=None):
    epoch_num, batch_size = params
    x_train, y_train, x_test, y_test, person_train_valid, person_test = data

    # subject split
    if idx is not None:
        x_train, y_train, x_test, y_test = sub_selection(idx, x_train, y_train, x_test, y_test, person_train_valid, person_test)

    model.fit(x_train, y_train, epochs=epoch_num, batch_size=batch_size, 
              validation_data=(x_test, y_test), shuffle=True, verbose=1)
    train_score = model.evaluate(x_train, y_train)
    test_score = model.evaluate(x_test, y_test)

    print('train {:s}: {:.3f}%'.format(model.metrics_names[1], train_score[1]*100))
    print('test {:s}: {:.3f}%'.format(model.metrics_names[1], test_score[1]*100))
    
    return train_score, test_score

## Preprocess Data

In [None]:
#Stack real and fake data

X_train_valid = np.vstack((X_train_valid, X_synthetic))
y_train_valid = np.hstack((y_train_valid, y_synthetic))

In [None]:
print(X_train_valid.shape)
print(y_train_valid.shape)

(4230, 22, 1000)
(4230,)


In [None]:
## Preprocessing the dataset
X_train_valid_prep, y_train_valid_prep = data_prep(X_train_valid,y_train_valid,2,2,True)
X_test_prep, y_test_prep = data_prep(X_test,y_test,2,2,True)
print_shapes("preprocessed X_train, X_test", X_train_valid_prep.shape, y_train_valid_prep.shape, X_test_prep.shape, y_test_prep.shape)

## Random splitting and reshaping the data

# First generating the training and validation indices using random splitting
ind_valid = np.random.choice(8460, 1500, replace=False)
ind_train = np.array(list(set(range(8460)).difference(set(ind_valid))))

# Creating the training and validation sets using the generated indices
(x_train, x_valid) = X_train_valid_prep[ind_train], X_train_valid_prep[ind_valid]
(y_train, y_valid) = y_train_valid_prep[ind_train], y_train_valid_prep[ind_valid]
print_shapes("train and validation", x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)

# Converting the labels to categorical variables for multiclass classification
y_train = to_categorical(y_train, 4)
y_valid = to_categorical(y_valid, 4)
y_test = to_categorical(y_test_prep, 4)

# Adding width of the segment to be 1
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], x_train.shape[2], 1)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
print("Add width info")
print('Training set:', x_train.shape)
print('Validation set:', x_valid.shape)
print('Test set:', x_test.shape)

# Reshaping the training and validation dataset
x_train = np.swapaxes(x_train, 1,3)
x_train = np.swapaxes(x_train, 1,2)
x_valid = np.swapaxes(x_valid, 1,3)
x_valid = np.swapaxes(x_valid, 1,2)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
print("Dimension reshaping")
print('Training set',x_train.shape)
print('Validation set',x_valid.shape)
print('Test set',x_test.shape)

Shape of [ preprocessed X_train, X_test ] data
(16920, 22, 250)
(16920,)
(1772, 22, 250)
(1772,)
Shape of [ train and validation ] data
(6960, 22, 250)
(6960,)
(1500, 22, 250)
(1500,)
Add width info
Training set: (6960, 22, 250, 1)
Validation set: (1500, 22, 250, 1)
Test set: (1772, 22, 250, 1)
Dimension reshaping
Training set (6960, 250, 1, 22)
Validation set (1500, 250, 1, 22)
Test set (1772, 250, 1, 22)


## GRU model Set up

In [None]:
## Hyper Parameters
learning_rate = 1e-3
epoch_num = 50
batch_size = 30
dropout_rate = 0.3

# Constants
num_classes = 4

In [None]:
def GRU_model(kernel_initializer, params):
    lr, dropout_rate = params

    channels_in_1, channels_out_1, kernel_size_1, stride_1 = (250,1,22), 25, (10, 1), 1
    pool_size_1, pool_stride_1 = (3, 1), 1
    channels_out_2, kernel_size_2, stride_2 = 50, (10, 1), 1
    pool_size_2, pool_stride_2 = (3, 1), 1
    channels_out_3, kernel_size_3, stride_3 = 100, (10, 1), 1
    pool_size_3, pool_stride_3 = (3, 1), 1
    channels_out_4, kernel_size_4, stride_4 = 200, (10, 1), 1
    pool_size_4, pool_stride_4 = (3, 1), 1
    hiddendim_1, hiddendim_2, hiddendim_3 = 100, 50, 25
    dropout_rate = 0.5
    num_classes = 4

    model = Sequential(
        [
         
        Conv2D(filters=channels_out_1, kernel_size=kernel_size_1, padding='same', activation='elu', input_shape=channels_in_1),
        MaxPool2D(pool_size=pool_size_1, padding='same'),
        BatchNormalization(),
        Dropout(dropout_rate),

        Conv2D(filters=channels_out_2, kernel_size=kernel_size_2, padding='same', activation='elu'),
        MaxPool2D(pool_size=pool_size_2, padding='same'),
        BatchNormalization(),
        Dropout(dropout_rate),

        Conv2D(filters=channels_out_3, kernel_size=kernel_size_3, padding='same', activation='elu'),
        MaxPool2D(pool_size=pool_size_3, padding='same'),
        BatchNormalization(),
        Dropout(dropout_rate),

        Conv2D(filters=channels_out_4, kernel_size=kernel_size_4, padding='same', activation='elu'),
        MaxPool2D(pool_size=pool_size_4, padding='same'),
        BatchNormalization(),
        Dropout(dropout_rate),
         
        Permute((2, 3, 1)),
        TimeDistributed(Flatten()),
        Bidirectional(GRU(hiddendim_1, kernel_initializer=kernel_initializer, return_sequences=True)),
        Bidirectional(GRU(hiddendim_2, kernel_initializer=kernel_initializer, return_sequences=True)),
        Bidirectional(GRU(hiddendim_3, kernel_initializer=kernel_initializer)),
        Dropout(dropout_rate),

        Dense(num_classes, activation='softmax'),

        ]
    )
    model.compile(loss='categorical_crossentropy', 
                  optimizer=Adam(learning_rate=lr),
                  metrics=[categorical_accuracy])
    return model

## Train the GRU Models

In [None]:
test_acc = []
gru_model = GRU_model(lecun_uniform(seed=42), [learning_rate, dropout_rate])
train_score, test_score = train(gru_model,
                                [epoch_num, batch_size],
                                [x_train, y_train, x_test, y_test, person_train_valid, person_test]
                                )

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
train categorical_accuracy: 62.701%
test categorical_accuracy: 62.754%


In [None]:

print('test {:s}: {:.3f}%'.format(gru_model.metrics_names[1], test_score[1]*100))
print(test_score[1]*100)

test categorical_accuracy: 62.754%
62.75395154953003


## Tune the parameters

In [None]:
test_acc = []
learning_rates = [5e-4, 1e-3, 5e-3]
dropout_rates = [0.2, 0.3, 0.4]
batch_sizes = [60,65]
for learning_rate in learning_rates:
  for dropout_rate in dropout_rates:
    for batch_size in batch_sizes:
      gru_model = GRU_model(lecun_uniform(seed=42), [learning_rate, dropout_rate])
      train_score, test_score = train(gru_model,
                                      [30, batch_size],
                                      [x_train, y_train, x_test, y_test, person_train_valid, person_test]
                                      )
      
      test_acc.append((learning_rate, dropout_rate, batch_size, test_score[1]*100))
print(test_acc)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
train categorical_accuracy: 57.184%
test categorical_accuracy: 61.512%
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
train categorical_accuracy: 58.391%
test categorical_accuracy: 64.503%
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/

In [None]:
print(test_acc)

[(0.0005, 0.2, 60, 63.31828236579895), (0.0005, 0.2, 65, 63.37471604347229), (0.0005, 0.3, 60, 62.30248212814331), (0.0005, 0.3, 65, 59.87584590911865), (0.0005, 0.4, 60, 63.03611993789673), (0.0005, 0.4, 65, 61.34311556816101), (0.001, 0.2, 60, 64.33408856391907), (0.001, 0.2, 65, 64.16478753089905), (0.001, 0.3, 60, 63.20541501045227), (0.001, 0.3, 65, 64.22122120857239), (0.001, 0.4, 60, 63.82618546485901), (0.001, 0.4, 65, 64.84198570251465), (0.005, 0.2, 60, 61.51241660118103), (0.005, 0.2, 65, 58.182841539382935), (0.005, 0.3, 60, 60.21444797515869), (0.005, 0.3, 65, 50.45146942138672), (0.005, 0.4, 60, 59.48081016540527), (0.005, 0.4, 65, 58.239275217056274)]


In [None]:
print(test_acc)

[(0.0005, 0.2, 60, 61.51241660118103), (0.0005, 0.2, 65, 64.50338363647461), (0.0005, 0.3, 60, 63.48758339881897), (0.0005, 0.3, 65, 62.97968626022339), (0.0005, 0.4, 60, 64.44694995880127), (0.0005, 0.4, 65, 61.11738085746765), (0.001, 0.2, 60, 59.81941223144531), (0.001, 0.2, 65, 58.80361199378967), (0.001, 0.3, 60, 60.27088165283203), (0.001, 0.3, 65, 58.521443605422974), (0.001, 0.4, 60, 60.04514694213867), (0.001, 0.4, 65, 63.43114972114563), (0.005, 0.2, 60, 47.91196286678314), (0.005, 0.2, 65, 45.9367960691452), (0.005, 0.3, 60, 43.623024225234985), (0.005, 0.3, 65, 48.1376975774765), (0.005, 0.4, 60, 46.78329527378082), (0.005, 0.4, 65, 46.16252779960632)]
