In [1]:
"""
Data loading
"""
import numpy as np
import pandas as pd 
import os
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
from plotly import tools
from scipy.stats import mannwhitneyu

init_notebook_mode(connected=True) ## plotly init
seed = 123
random.seed = seed
%matplotlib inline

In [3]:
"""
training dataset
"""

filenames_list = os.listdir('data/Train/') ## list of file names in the directory
EEG_data_train = pd.DataFrame({})    ## create an empty df that will hold data from each file

for file_name in tqdm(filenames_list):
    temp_df = pd.read_csv('data/Train/' + file_name) ## read from the file to df
    EEG_data_train = EEG_data_train.append(temp_df)                           ## add the file data to the main df
    
EEG_data_train = EEG_data_train.drop(['Unnamed: 0'], axis=1)                  ## remove the unused column
EEG_data_train.loc[EEG_data_train['matching condition'] == 'S2 nomatch,', 'matching condition'] =  'S2 nomatch' ## remove comma sign from stimulus name

100%|████████████████████████████████████████████████████████████████████████████████| 468/468 [04:54<00:00,  1.21s/it]


In [4]:
"""
test dataset
"""
filenames_list = os.listdir('data/Test/') ## list of file names in the directory
EEG_data = pd.DataFrame({})    ## create an empty df that will hold data from each file

for file_name in tqdm(filenames_list):
    temp_df = pd.read_csv('data/Test/' + file_name) ## read from the file to df
    EEG_data = EEG_data.append(temp_df)                           ## add the file data to the main df
    
EEG_data = EEG_data.drop(['Unnamed: 0'], axis=1)                  ## remove the unused column
EEG_data.loc[EEG_data['matching condition'] == 'S2 nomatch,', 'matching condition'] =  'S2 nomatch'

100%|████████████████████████████████████████████████████████████████████████████████| 480/480 [05:15<00:00,  1.26s/it]


In [5]:
EEG_data_train.head(10)

Unnamed: 0,trial number,sensor position,sample num,sensor value,subject identifier,matching condition,channel,name,time
0,0,FP1,0,-8.921,a,S1 obj,0,co2a0000364,0.0
1,0,FP1,1,-8.433,a,S1 obj,0,co2a0000364,0.003906
2,0,FP1,2,-2.574,a,S1 obj,0,co2a0000364,0.007812
3,0,FP1,3,5.239,a,S1 obj,0,co2a0000364,0.011719
4,0,FP1,4,11.587,a,S1 obj,0,co2a0000364,0.015625
5,0,FP1,5,14.028,a,S1 obj,0,co2a0000364,0.019531
6,0,FP1,6,11.587,a,S1 obj,0,co2a0000364,0.023438
7,0,FP1,7,6.704,a,S1 obj,0,co2a0000364,0.027344
8,0,FP1,8,1.821,a,S1 obj,0,co2a0000364,0.03125
9,0,FP1,9,-1.109,a,S1 obj,0,co2a0000364,0.035156


In [32]:
df_a = EEG_data_train[EEG_data_train['subject identifier'] == 'a']
df_c = EEG_data_train[EEG_data_train['subject identifier'] == 'c']

In [40]:
"""
rebuild the dataset
"""
sensor_pos_array = []

#Getting a set of sensor position
for sensor_pos in df_a['sensor position']:
    if sensor_pos not in sensor_pos_array:
        sensor_pos_array.append(sensor_pos)

np_a = df_a[['sensor value','sensor position','time']]
np_c = df_c[['sensor value','sensor position','time']]

# Converting all sensor position from 0-63 instead of their alpha form.
for sensor_replace in range(len(sensor_pos_array)):
    np_a.loc[np_a['sensor position'] == str(sensor_pos_array[sensor_replace]), 'sensor position'] = sensor_replace
    np_c.loc[np_c['sensor position'] == str(sensor_pos_array[sensor_replace]), 'sensor position'] = sensor_replace
    
np_a = np.array(np_a)  # transform the pandas dataframe to numpy array
np_a = np.expand_dims(np_a, axis = 1)
np_a = np_a[0:3850000, :]

np_c = np.array(np_c)  # transform the pandas dataframe to numpy array
np_c = np.expand_dims(np_c, axis = 1)
np_c = np_c[0:3810000, :]

#batches
np_a = np.reshape(np_a, [3850, 3000])
np_c = np.reshape(np_c, [3810, 3000])
print(np_a.shape)  # 3850000
print(np_c.shape)  # 3810000
X = np.concatenate((np_a, np_c), axis = 0)
print(X.shape)

(3850, 3000)
(3810, 3000)
(7660, 3000)


In [None]:
print(np_a)

In [None]:
"""
build the labels
"""
Y = np.ones(X.shape[0])               
Y[385:] = 0                        # label a: 1, label c: 0
Y = np.expand_dims(Y, axis = 1)
print(X.shape)
print(Y.shape)

In [None]:
# train_set = np.hstack((X, Y))
# print(train_set.shape)

# 80% train, 20% test
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42) 

In [None]:
import tensorflow as tf

feature_count = X_train.shape[1]     # 1000
label_count = Y_train.shape[1]

training_epochs = 200
learning_rate = 0.001
hidden_layers = feature_count - 1
cost_history = np.empty(shape=[1],dtype=float)

X = tf.placeholder(tf.float32,[None, feature_count])
Y = tf.placeholder(tf.float32,[None, label_count])
is_training=tf.Variable(True,dtype=tf.bool)

In [None]:
# models

initializer = tf.contrib.layers.xavier_initializer()
h0 = tf.layers.dense(X, hidden_layers, activation=tf.nn.relu, kernel_initializer=initializer)
# h0 = tf.nn.dropout(h0, 0.95)
h1 = tf.layers.dense(h0, label_count, activation=None)

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=h1)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

predicted = tf.nn.sigmoid(h1)
correct_pred = tf.equal(tf.round(predicted), Y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [41]:
Test_new = EEG_data
name_array = []
for names in Test_new['name']:
    if names not in name_array:
        name_array.append(names)

temp = Test_new['name'].value_counts()

Test_new = Test_new[['sensor value','sensor position','time','name','subject identifier']]

for sensor_replace in range(len(sensor_pos_array)):
    Test_new.loc[Test_new['sensor position'] == str(sensor_pos_array[sensor_replace]), 'sensor position'] = sensor_replace

P = []                          # patient
P.append(Test_new[Test_new['name'] == name_array[0]])
test0 = np.array(P[0]['sensor value'])
test1 = np.array(P[0]['sensor position'])
test2 = np.array(P[0]['time'])

rows = int(test0.shape[0]/3000)

X_test = np.reshape(test0[:rows*3000], (rows,3000))

print(X_test)

label_temp = np.array(P[0]['subject identifier'])
label_temp = np.expand_dims(label_temp, axis = 1)

if label_temp[0,:] == 'a':
    Y_test = np.ones((rows, 1))
elif label_temp[0,:] == 'c':
    Y_test = np.zeros((rows, 1))

print(Y_test)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(training_epochs + 1):
        temp_X = np.transpose(np.expand_dims(X_train[step,:], axis = 1))
        temp_Y = np.transpose(np.expand_dims(Y_train[step,:], axis = 1))
        sess.run(optimizer, feed_dict={X: temp_X, Y: temp_Y})
        loss, _, acc = sess.run([cost, optimizer, accuracy], feed_dict={
                                 X: X_train, Y: Y_train})
#         cost_history = np.append(cost_history, acc)
        if step % 50 == 0:
            print("Step: {:5}\tLoss: {:.3f}\tAcc: {:.2%}".format(step, loss, acc))

    print('Test Accuracy of Patient No. P'+str(name_array[0]), sess.run(accuracy, feed_dict={X: X_test, Y: Y_test}))
        
        
        
        
        
        
    #Test model and check accuracy
    #print('Test Accuracy:', sess.run([accuracy, tf.round(predicted)], feed_dict={X: X_test, Y: Y_test}))   
    
#     Test_new = EEG_data
#     name_array = []
#     for names in Test_new['name']:
#         if names not in name_array:
#             name_array.append(names)
#     # print(name_array)
#     temp = Test_new['name'].value_counts()
#     # print(temp)

#     P = []                          # patient
#     Accuracy =[]
#     for i in range(len(name_array)):
#         P.append(Test_new[Test_new['name'] == name_array[i]])

#         test0 = np.array(P[i]['sensor value'])
#         print(test0)
#         rows = int(test0.shape[0]/1000)
#         X_test = np.reshape(test0[:rows*1000,], (rows, 1000))

#         label_temp = np.array(P[i]['subject identifier'])
#         label_temp = np.expand_dims(label_temp, axis = 1)
        
#         P[i]['subject identifier'].describe()

#         if label_temp[0,:] == 'a':
#             Y_test = np.zeros((rows, 1))
#         elif label_temp[0,:] == 'c':
#             Y_test = np.zeros((rows, 1))
        
        #print('Test Accuracy of Patient No. P'+str(name_array[i]), sess.run(accuracy, feed_dict={X: X_test, Y: Y_test}))
        
#       Accuracy.append(accuracy)
        
# for i in range(len(name_array)):
#    print('Test Accuracy of Patient No. P-'+str(name_array[i]), Accuracy[i])

(491520,)
491520
[]
[]


TypeError: unhashable type: 'numpy.ndarray'

In [52]:
Test_new = EEG_data
name_array = []
for names in Test_new['name']:
    if names not in name_array:
        name_array.append(names)
# print(name_array)
temp = Test_new['name'].value_counts()
# print(temp)

Test_new = Test_new[['sensor value','sensor position','time','name','subject identifier']]


P = []                          # patient
P.append(Test_new[Test_new['name'] == name_array[0]][['sensor value', 'sensor position','time']])
print(P)
test0 = np.asarray(P)
print(test0.shape)

rows = int(test0.shape[0]/1000)
X_test = np.reshape(test[:rows*1000], (rows,1000))

print(X_test.shape)
label_temp = np.array(P[0]['subject identifier'])
label_temp = np.expand_dims(label_temp, axis = 1)

if label_temp[0,:] == 'a':
    Y_test = np.ones((rows, 1))
elif label_temp[0,:] == 'c':
    Y_test = np.zeros((rows, 1))
    
#print('Test Accuracy of Patient No. P'+str(name_array[i]), sess.run(accuracy, feed_dict={X: X_test, Y: Y_test}))



# for i in range(len(name_array)):
#     P.append(Test_new[Test_new['name'] == name_array[i]])

#     test0 = np.array(P[i][['sensor value'],['sensor position'], ['time']])
#     print(test0)
#     rows = int(test0.shape[0]/1000)
#     X_test = np.reshape(test0[:rows*1000,], (rows, 1000))

#     label_temp = np.array(P[i]['subject identifier'])
#     label_temp = np.expand_dims(label_temp, axis = 1)

#     P[i]['subject identifier'].describe()

#     if label_temp[0,:] == 'a':
#         Y_test = np.ones((rows, 1))
#     elif label_temp[0,:] == 'c':
#         Y_test = np.zeros((rows, 1))

    #print('Test Accuracy of Patient No. P'+str(name_array[i]), sess.run(accuracy, feed_dict={X: X_test, Y: Y_test}))

[       sensor value sensor position      time
0            -3.550             FP1  0.000000
1            -5.015             FP1  0.003906
2            -5.503             FP1  0.007812
3            -3.550             FP1  0.011719
4            -0.621             FP1  0.015625
5             1.821             FP1  0.019531
6             2.309             FP1  0.023438
7             0.844             FP1  0.027344
8             0.844             FP1  0.031250
9             3.286             FP1  0.035156
10            7.680             FP1  0.039062
11           10.122             FP1  0.042969
12            9.145             FP1  0.046875
13            5.239             FP1  0.050781
14            0.844             FP1  0.054688
15           -0.621             FP1  0.058594
16            2.309             FP1  0.062500
17            6.215             FP1  0.066406
18            7.192             FP1  0.070312
19            4.262             FP1  0.074219
20           -1.109             F

ValueError: cannot copy sequence with size 491520 to array axis with dimension 3

In [31]:
print(Y_test)

[]
