In [1]:
# Read the dataset
import scipy.io as spio
import numpy as np

BahramFace = spio.loadmat(file_name='/home/arasdar/datasets/bci-project-data-RAW/BahramFace.mat')
DJFace = spio.loadmat(file_name='/home/arasdar/datasets/bci-project-data-RAW/DJFace.mat')
NickFace = spio.loadmat(file_name='/home/arasdar/datasets/bci-project-data-RAW/NickFace.mat')
RoohiFace = spio.loadmat(file_name='/home/arasdar/datasets/bci-project-data-RAW/RoohiFace.mat')
SarahFace = spio.loadmat(file_name='/home/arasdar/datasets/bci-project-data-RAW/SarahFace.mat')

AllData = np.concatenate((BahramFace['Intensification_Data'],
                            DJFace['Intensification_Data'],
                            NickFace['Intensification_Data'],
                            RoohiFace['Intensification_Data'],
                            SarahFace['Intensification_Data']), axis=0)

AllLabels = np.concatenate((BahramFace['Intensification_Label'],
                            DJFace['Intensification_Label'],
                            NickFace['Intensification_Label'],
                            RoohiFace['Intensification_Label'],
                            SarahFace['Intensification_Label']), axis=0)

print(AllData.shape, AllData.dtype, AllLabels.shape, AllLabels.dtype)
print(np.mean(AllLabels==0), np.mean(AllLabels==1), np.mean(AllLabels==2), np.mean(AllLabels==3))
print((AllLabels +  1).max(axis=0))
print(AllData[:1, :1, :2], AllLabels[:10])

(18720, 205, 16) float64 (18720, 1) uint8
0.833333333333 0.166666666667 0.0 0.0
[2]
[[[ 6.87143564  6.26277733]]] [[1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]]


In [2]:
# Shuffle the data before anything
np.random.shuffle(AllData)
np.random.shuffle(AllLabels)

print(AllData.shape, AllData.dtype, AllLabels.shape, AllLabels.dtype)
print(np.mean(AllLabels==0), np.mean(AllLabels==1), np.mean(AllLabels==2), np.mean(AllLabels==3))
print((AllLabels +  1).max(axis=0))
print(AllData[:1, :1, :2], AllLabels[:10])

(18720, 205, 16) float64 (18720, 1) uint8
0.833333333333 0.166666666667 0.0 0.0
[2]
[[[-1.14882886  1.12322485]]] [[0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]


In [3]:
# Solving the overfitting problem by adding linearly transformed and separable data
# linearly transformed and separable data augmentation
AllLabelOne = (AllLabels==1).reshape(-1)
AllData_LabelOne = AllData[AllLabelOne]
AllLabels_LabelOne = AllLabels[AllLabelOne]

AllLabelZero = (AllLabels==0).reshape(-1)
AllData_LabelZero = AllData[AllLabelZero]
AllLabels_LabelZero = AllLabels[AllLabelZero]

# print(AllData_LabelOne.shape, AllData_LabelZero.shape, AllLabelZero.shape, AllLabelOne.shape)
print(np.mean(AllLabelZero==0), np.mean(AllLabelZero==1), 
      np.mean(AllLabelOne==0), np.mean(AllLabelOne==1))
# print(AllLabelOne[:10])
print(AllData_LabelOne.shape, AllData_LabelZero.shape, AllLabels_LabelZero.shape, AllLabels_LabelOne.shape)
print(np.mean(AllLabels_LabelZero==0), np.mean(AllLabels_LabelZero==1), 
      np.mean(AllLabels_LabelOne==0), np.mean(AllLabels_LabelOne==1))

0.166666666667 0.833333333333 0.833333333333 0.166666666667
(3120, 205, 16) (15600, 205, 16) (15600, 1) (3120, 1)
1.0 0.0 0.0 1.0


In [4]:
# Train and test split
# test data size 30% of all data
test_size = int((AllData.shape[0] * 0.3)//2)  #is the test data size for each class tgt or non tgt
print(test_size, AllData.shape[0])

X_train_valid1, Y_train_valid1 = AllData_LabelZero[:-test_size], AllLabels_LabelZero[:-test_size]
X_test1, Y_test1 = AllData_LabelZero[-test_size:], AllLabels_LabelZero[-test_size:]
X_train_valid2, Y_train_valid2 = AllData_LabelOne[:-test_size], AllLabels_LabelOne[:-test_size]
X_test2, Y_test2 = AllData_LabelOne[-test_size:], AllLabels_LabelOne[-test_size:]

print(X_train_valid1.shape, X_test1.shape, Y_train_valid1.shape, Y_test1.shape)
print(X_train_valid1.shape, X_test1.shape, Y_train_valid1.shape, Y_test1.shape)

X_train_valid = np.concatenate((X_train_valid1, X_train_valid2), axis=0)
Y_train_valid = np.concatenate((Y_train_valid1, Y_train_valid2), axis=0)
X_test = np.concatenate((X_test1, X_test2), axis=0)
Y_test = np.concatenate((Y_test1, Y_test2), axis=0)

print(X_train_valid.shape, X_test.shape, Y_train_valid.shape, Y_test.shape)
print(np.mean(Y_train_valid==0), np.mean(Y_train_valid==1)) 
print('np.mean(Y_test==0), np.mean(Y_test==1)', np.mean(Y_test==0), np.mean(Y_test==1))

2808 18720
(12792, 205, 16) (2808, 205, 16) (12792, 1) (2808, 1)
(12792, 205, 16) (2808, 205, 16) (12792, 1) (2808, 1)
(13104, 205, 16) (5616, 205, 16) (13104, 1) (5616, 1)
0.97619047619 0.0238095238095
np.mean(Y_test==0), np.mean(Y_test==1) 0.5 0.5


In [5]:
# Preparing for data augmentation
AllData, AllLabels = X_train_valid, Y_train_valid

In [6]:
# Solving the overfitting problem by adding linearly transformed and separable data
# linearly transformed and separable data augmentation
AllLabelOne = (AllLabels==1).reshape(-1)
AllData_LabelOne = AllData[AllLabelOne]
AllLabels_LabelOne = AllLabels[AllLabelOne]

AllLabelZero = (AllLabels==0).reshape(-1)
AllData_LabelZero = AllData[AllLabelZero]
AllLabels_LabelZero = AllLabels[AllLabelZero]

print(AllData_LabelOne.shape[0], AllData_LabelZero.shape[0])

# Number of times, we need to generate target data for uniform distribution
print(int(AllData_LabelZero.shape[0]/ AllData_LabelOne.shape[0]))
num_DataLabelOneNew = int(AllData_LabelZero.shape[0]/ AllData_LabelOne.shape[0])
print(num_DataLabelOneNew)

312 12792
41
41


In [7]:
# Linear synthetic data augmentatiion or creation or generation
AllData_LabelOneNew_list, AllLabels_LabelOneNew_list = [], []

# w*data+b: translation, rotation, and scaling
# linear transformation of the target data
w, b = 1.0, 0.0
for _ in range(num_DataLabelOneNew):
    AllData_LabelOneNew = (w * AllData_LabelOne) + b
    AllData_LabelOneNew_list.append(AllData_LabelOneNew)
    w *= 0.9 # 1.0, 0.9, 0.81, 0.729, 0.6561
    b += 0.1 # 0.0, 0.1, 0.2, 0.3, 0.4
    AllLabels_LabelOneNew_list.append(AllLabels_LabelOne)

AllData_LabelOneNew_total = np.array(AllData_LabelOneNew_list, 
                                      dtype=AllData_LabelOne.dtype).reshape(-1, 205, 16)
AllLabels_LabelOneNew_total = np.array(AllLabels_LabelOneNew_list, 
                                      dtype=AllLabels_LabelOne.dtype).reshape(-1)
print(AllData_LabelOneNew_total.shape, AllLabels_LabelOneNew_total.shape)
print(AllData_LabelOneNew_total.dtype, AllLabels_LabelOneNew_total.dtype)


AllDataNew = np.concatenate((AllData_LabelOneNew_total, AllData_LabelZero), axis=0)
print(AllData_LabelOneNew_total.shape, AllData_LabelZero.shape)

print(AllLabels_LabelOneNew_total.shape, AllLabels_LabelZero.shape)
AllLabelsNew = np.concatenate((AllLabels_LabelOneNew_total, AllLabels_LabelZero.reshape(-1)), axis=0)

print(AllDataNew.shape, AllDataNew.dtype, AllLabelsNew.shape, AllLabelsNew.dtype)
print(np.mean(AllLabelsNew==0), np.mean(AllLabelsNew==1), np.mean(AllLabelsNew==2), np.mean(AllLabelsNew==3))
print((AllLabelsNew +  1).max(axis=0))
print('np.mean(AllLabelsNew==0), np.mean(AllLabelsNew==1)', np.mean(AllLabelsNew==0), np.mean(AllLabelsNew==1))

(12792, 205, 16) (12792,)
float64 uint8
(12792, 205, 16) (12792, 205, 16)
(12792,) (12792, 1)
(25584, 205, 16) float64 (25584,) uint8
0.5 0.5 0.0 0.0
2
np.mean(AllLabelsNew==0), np.mean(AllLabelsNew==1) 0.5 0.5


In [8]:
# Implementing Yalda's seggestion
#  AllData and Alllabels are limited to train-valid data only
X_train_valid, Y_train_valid = AllDataNew, AllLabelsNew
print(AllDataNew.shape, AllDataNew.dtype, AllLabelsNew.shape, AllLabelsNew.dtype)
print(np.mean(AllLabelsNew==0), np.mean(AllLabelsNew==1), np.mean(AllLabelsNew==2), np.mean(AllLabelsNew==3))
print((AllLabelsNew +  1).max(axis=0))

(25584, 205, 16) float64 (25584,) uint8
0.5 0.5 0.0 0.0
2


In [9]:
# Preparing input and output data
from utilities import *

# Normalizing/standardizing the input data features
X_train_valid_norm, X_test_norm = standardize(test=X_test, train=X_train_valid)

# Onehot encoding/vectorizing the output data labels
print(np.mean((Y_train_valid+1).reshape(-1)==0), np.mean((Y_train_valid+1).reshape(-1)==1),
     np.mean((Y_train_valid+1).reshape(-1)==2), np.mean((Y_train_valid+1).reshape(-1)==3))

print(np.mean((Y_test+1).reshape(-1)==0), np.mean((Y_test+1).reshape(-1)==1),
     np.mean((Y_test+1).reshape(-1)==2), np.mean((Y_test+1).reshape(-1)==3))

Y_train_valid_onehot = one_hot(labels=(Y_train_valid+1).reshape(-1), n_class=2) 
Y_test_onehot = one_hot(labels=(Y_test+1).reshape(-1), n_class=2) 

print(Y_train_valid_onehot.shape, Y_train_valid_onehot.dtype, 
      Y_test_onehot.shape, Y_test_onehot.dtype)

0.0 0.5 0.5 0.0
0.0 0.5 0.5 0.0
(25584, 2) float64 (5616, 2) float64


In [10]:
# Train and valid split
from sklearn.model_selection import train_test_split
X_train_norm, X_valid_norm, Y_train_onehot, Y_valid_onehot = train_test_split(X_train_valid_norm, 
                                                                              Y_train_valid_onehot,
                                                                              test_size=0.30)

print(X_train_norm.shape, X_valid_norm.shape, Y_train_onehot.shape, Y_valid_onehot.shape)

(17908, 205, 16) (7676, 205, 16) (17908, 2) (7676, 2)


In [11]:
## Hyperparameters
# Input data
batch_size = X_train_norm.shape[0]// 100 # minibatch size & number of minibatches
seq_len = X_train_norm.shape[1] # Number of steps: each trial length
n_channels = X_train_norm.shape[2] # number of channels in each trial
print('batch_size, seq_len, n_channels', batch_size, seq_len, n_channels)

# Output labels
n_classes = Y_train_valid.max(axis=0)+1
assert Y_train_valid.max(axis=0) == Y_test.max(axis=0)
print('n_classes', n_classes)

# learning parameters
learning_rate = 0.0001 #1e-4
epochs = 10 # num iterations for updating model
keep_prob = 0.50 # 90% neurons are kept and 10% are dropped out

batch_size, seq_len, n_channels 179 205 16
n_classes 2


In [12]:
# GPUs or CPU
import tensorflow as tf

# Check TensorFlow Version
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.3.0
Default GPU Device: /gpu:0


In [13]:
# Feed the data from python/numpy to tensorflow framework
inputs_ = tf.placeholder(tf.float32, [None, seq_len, n_channels], name = 'inputs_')
labels_ = tf.placeholder(tf.float32, [None, n_classes], name = 'labels_')
keep_prob_ = tf.placeholder(tf.float32, name = 'keep_prob_')
learning_rate_ = tf.placeholder(tf.float32, name = 'learning_rate_')

In [14]:
# batch_size, seq_len, n_channels 179 205 16
# (batch, 205, 16) --> (batch, 102, 32)
# conv valid: (205-2+0)/1 + 1 = (203/1)+1 = 203 + 1=204
# pool same: (204-2+0)/2 + 1 = (202/2)+1 = 101 + 1=102
conv1 = tf.layers.conv1d(inputs=inputs_, filters=32, kernel_size=2, strides=1, padding='valid', 
                         activation = tf.nn.relu)
max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
# max_pool_1 = tf.nn.dropout(max_pool_1, keep_prob=keep_prob_)
print('inputs_.shape, conv1.shape, max_pool_1.shape', inputs_.shape, conv1.shape, max_pool_1.shape)

# (batch, 102, 32) --> (batch, 51, 64)
# conv same
# pool same: (102-2+0)/2 + 1 = (100/2)+1 = 50 + 1=51
conv2 = tf.layers.conv1d(inputs=max_pool_1, filters=64, kernel_size=2, strides=1, padding='same', 
                         activation = tf.nn.relu)
max_pool_2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding='same')
# max_pool_2 = tf.nn.dropout(max_pool_2, keep_prob=keep_prob_)
print('max_pool_1.shape, conv2.shape, max_pool_2.shape', max_pool_1.shape, conv2.shape, max_pool_2.shape)

# (batch, 51, 64) --> (batch, 25, 128)
# conv valid: (51-2+0)/1 + 1 = (49/1)+1 = 49 + 1=50
# pool same: (50-2+0)/2 + 1 = (48/2)+1 = 24 + 1=25
conv3 = tf.layers.conv1d(inputs=max_pool_2, filters=128, kernel_size=2, strides=1, padding='valid', 
                         activation = tf.nn.relu)
max_pool_3 = tf.layers.max_pooling1d(inputs=conv3, pool_size=2, strides=2, padding='same')
# max_pool_3 = tf.nn.dropout(max_pool_3, keep_prob=keep_prob_)
print('max_pool_2.shape, conv3.shape, max_pool_3.shape', max_pool_2.shape, conv3.shape, max_pool_3.shape)

# (batch, 25, 128) --> (batch, 12, 256)
# conv valid: (25-2+0)/1 + 1 = (23/1)+1 = 23 + 1=24
# pool same: (24-2+0)/2 + 1 = (22/2)+1 = 11 + 1=12
conv4 = tf.layers.conv1d(inputs=max_pool_3, filters=256, kernel_size=2, strides=1, padding='valid', 
                         activation = tf.nn.relu)
max_pool_4 = tf.layers.max_pooling1d(inputs=conv4, pool_size=2, strides=2, padding='same')
# max_pool_4 = tf.nn.dropout(max_pool_4, keep_prob=keep_prob_)
print('max_pool_3.shape, conv4.shape, max_pool_4.shape', max_pool_3.shape, conv4.shape, max_pool_4.shape)

# Flatten and add dropout + predicted output
flat = tf.reshape(max_pool_4, (-1, 12*256))
flat = tf.nn.dropout(flat, keep_prob=keep_prob_)
logits = tf.layers.dense(flat, n_classes)
print('max_pool_4.shape, flat.shape, logits.shape', max_pool_4.shape, flat.shape, logits.shape)

inputs_.shape, conv1.shape, max_pool_1.shape (?, 205, 16) (?, 204, 32) (?, 102, 32)
max_pool_1.shape, conv2.shape, max_pool_2.shape (?, 102, 32) (?, 102, 64) (?, 51, 64)
max_pool_2.shape, conv3.shape, max_pool_3.shape (?, 51, 64) (?, 50, 128) (?, 25, 128)
max_pool_3.shape, conv4.shape, max_pool_4.shape (?, 25, 128) (?, 24, 256) (?, 12, 256)
max_pool_4.shape, flat.shape, logits.shape (?, 12, 256) (?, 3072) (?, 2)


In [15]:
# Backward pass: error backpropagation
# Cost function
cost_tensor = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_)
cost = tf.reduce_mean(input_tensor=cost_tensor)
print('cost_tensor, cost', cost_tensor, cost)

# Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_).minimize(cost)
print('optimizer', optimizer)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
print('correct_pred, accuracy', correct_pred, accuracy)

# Confusion matrix
confusion_matrix = tf.confusion_matrix(predictions=tf.argmax(logits, 1),
                                       labels=tf.argmax(labels_, 1))
print('confusion_matrix', confusion_matrix)

cost_tensor, cost Tensor("Reshape_3:0", shape=(?,), dtype=float32) Tensor("Mean:0", shape=(), dtype=float32)
optimizer name: "Adam"
op: "NoOp"
input: "^Adam/update_conv1d/kernel/ApplyAdam"
input: "^Adam/update_conv1d/bias/ApplyAdam"
input: "^Adam/update_conv1d_1/kernel/ApplyAdam"
input: "^Adam/update_conv1d_1/bias/ApplyAdam"
input: "^Adam/update_conv1d_2/kernel/ApplyAdam"
input: "^Adam/update_conv1d_2/bias/ApplyAdam"
input: "^Adam/update_conv1d_3/kernel/ApplyAdam"
input: "^Adam/update_conv1d_3/bias/ApplyAdam"
input: "^Adam/update_dense/kernel/ApplyAdam"
input: "^Adam/update_dense/bias/ApplyAdam"
input: "^Adam/Assign"
input: "^Adam/Assign_1"

correct_pred, accuracy Tensor("Equal:0", shape=(?,), dtype=bool) Tensor("accuracy:0", shape=(), dtype=float32)
confusion_matrix Tensor("confusion_matrix/SparseTensorDenseAdd:0", shape=(?, ?), dtype=int32)


In [None]:
train_acc, train_loss = [], []
valid_acc, valid_loss = [], []

# Save the training result or trained and validated model params
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
   
    # Loop over epochs
    for e in range(epochs):
        
        # Loop over batches
        for x, y in get_batches(X_train_norm, Y_train_onehot, batch_size):
            
            ######################## Training
            # Feed dictionary
            feed = {inputs_ : x, labels_ : y, keep_prob_ : keep_prob, learning_rate_ : learning_rate}
            
            # Loss
            loss, _ , acc = sess.run([cost, optimizer, accuracy], feed_dict = feed)
            train_acc.append(acc)
            train_loss.append(loss)
            
            ################## Validation
            acc_batch = []
            loss_batch = []    
            # Loop over batches
            for x, y in get_batches(X_valid_norm, Y_valid_onehot, batch_size):

                # Feed dictionary
                feed = {inputs_ : x, labels_ : y, keep_prob_ : 1.0}

                # Loss
                loss, acc = sess.run([cost, accuracy], feed_dict = feed)
                acc_batch.append(acc)
                loss_batch.append(loss)

            # Store
            valid_acc.append(np.mean(acc_batch))
            valid_loss.append(np.mean(loss_batch))
            
        # Print info for every iter/epoch
        print("Epoch: {}/{}".format(e+1, epochs),
              "Train loss: {:6f}".format(np.mean(train_loss)),
              "Valid loss: {:.6f}".format(np.mean(valid_loss)),
              "Train acc: {:6f}".format(np.mean(train_acc)),
              "Valid acc: {:.6f}".format(np.mean(valid_acc)))
                
    saver.save(sess,"checkpoints_/dcnn-face-yalda.ckpt")

Epoch: 1/10 Train loss: 0.481470 Valid loss: 0.471453 Train acc: 0.762793 Valid acc: 0.766024
Epoch: 2/10 Train loss: 0.371572 Valid loss: 0.358994 Train acc: 0.837123 Valid acc: 0.839918
Epoch: 3/10 Train loss: 0.316479 Valid loss: 0.303622 Train acc: 0.868492 Valid acc: 0.871753
Epoch: 4/10 Train loss: 0.283021 Valid loss: 0.269910 Train acc: 0.886634 Valid acc: 0.890437
Epoch: 5/10 Train loss: 0.259822 Valid loss: 0.247278 Train acc: 0.898659 Valid acc: 0.902812
Epoch: 6/10 Train loss: 0.242894 Valid loss: 0.230777 Train acc: 0.906862 Valid acc: 0.911622
Epoch: 7/10 Train loss: 0.229474 Valid loss: 0.218175 Train acc: 0.913384 Valid acc: 0.918269
Epoch: 8/10 Train loss: 0.218514 Valid loss: 0.208136 Train acc: 0.918666 Valid acc: 0.923456
Epoch: 9/10 Train loss: 0.209592 Valid loss: 0.199877 Train acc: 0.922924 Valid acc: 0.927629


In [None]:
import matplotlib.pyplot as mplot

mplot.plot(train_loss, label='Face train_loss')
mplot.plot(valid_loss, label='Face valid_loss')
mplot.legend()
mplot.show()

In [None]:
# import matplotlib.pyplot as mplot
mplot.plot(train_acc, label='Face train_acc')
mplot.plot(valid_acc, label='Face valid_acc')
mplot.legend()
mplot.show()

In [None]:
test_acc, test_loss = [], []

with tf.Session() as sess:
    # Restore the validated model
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints_/'))
    
    ################## Test
    acc_batch = []
    loss_batch = []    
    # Loop over batches
    for x, y in get_batches(X_test_norm, Y_test_onehot, batch_size):

        # Feed dictionary
        feed = {inputs_ : x, labels_ : y, keep_prob_ : 1.0}

        # Loss
        loss, acc = sess.run([cost, accuracy], feed_dict = feed)
        acc_batch.append(acc)
        loss_batch.append(loss)

    # Store
    test_acc.append(np.mean(acc_batch))
    test_loss.append(np.mean(loss_batch))

    # Print info for every iter/epoch
    print("Test loss: {:6f}".format(np.mean(test_loss)),
          "Test acc: {:.6f}".format(np.mean(test_acc)))