In [1]:
# Imports
import scipy.io as sio
from scipy import stats
from sklearn import linear_model
import numpy as np
import matplotlib.pyplot as plt
import sys
import numpy as np
from sklearn import datasets
import scipy
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import Ridge, RidgeCV, LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
import h5py
import random
import utils
import csv

sys.path.append("../") # go to parent dir
from mrcode.utils.file_utils import fileFinder, folderFinder

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [57]:
def std_windows(eeg, time_window = 50):
    temp_eeg = []
    for ii in range(eeg.shape[0]):
        temp_epoch = eeg[ii,:,:]
        temp_eeg.append(np.std(temp_epoch.reshape(int(temp_epoch.shape[0]/time_window),time_window,temp_epoch.shape[1]),1).flatten())
    std_eeg = np.array(temp_eeg)
    return std_eeg

def std_overlap_windows(eeg, time_window = 80, overlap = 0.5):
    temp_eeg = []
    for ii in range(eeg.shape[0]):
        temp_epoch = eeg[ii,:,:]
        std_channels = []
        for jj in range(temp_epoch.shape[1]):
            channel = temp_epoch[:,jj]

            std_channel = []
            for kk in range(int(time_window*overlap)):
                std_channel.append(channel[kk:(kk+time_window)])

            std_channels.append(np.std(std_channel,1))

        temp_eeg.append(std_channels)
        
    temp_eeg_array = np.array(temp_eeg)
    std_temp_eeg = temp_eeg_array.reshape(temp_eeg_array.shape[0],temp_eeg_array.shape[2]*temp_eeg_array.shape[1])
    return std_temp_eeg

def load_data(ii,data_path,feature = 'raw'):
    data = sio.loadmat(data_path + '/' + ii + '/eeg_events.mat')
    eeg = data['eeg_events'].transpose()
    
    if feature == 'raw':
        eeg = eeg.reshape(eeg.shape[0],eeg.shape[2]*eeg.shape[1])
        # normalizing data
        #eeg = np.arcsinh(eeg*1000)
    
    if feature == 'std':
        eeg = std_windows(eeg)
        
    if feature == 'std_overlap':
        eeg = std_overlap_windows(eeg)
    
    data_image = sio.loadmat(data_path + '/' + ii + '/image_semantics.mat')
    image_semantics = data_image['image_semantics'].transpose()
    image_info = pd.read_csv(data_path + '/' + ii + '/image_order.txt', delimiter='\t', dtype=object)
    return eeg, image_semantics, image_info

In [81]:
data_path = '../data/experiment_data'
experiment_folders = folderFinder(data_path)
X_temp = []
Y_temp = []
imageids = []
target = 'image_id'
loadmode = 'raw'
for count_sources, ii in enumerate(experiment_folders):
    
    # first time
    if count_sources == 0:
        eeg, image_semantics, image_info = load_data(ii,data_path, feature = loadmode)
        X_temp = eeg
        Y_temp = image_semantics
        imageids = list(image_info[target].as_matrix())
        
    # after first load
    elif count_sources > 0:
        eeg, image_semantics, image_info = load_data(ii,data_path, feature = loadmode)
        X_temp = np.vstack((X_temp,eeg))
        Y_temp = np.vstack((Y_temp,image_semantics))
        imageids = np.hstack((imageids,list(image_info[target].as_matrix())))

## DO NOT RUN Make data ready for CNN

In [82]:
# Normalize - this is run be
xScale = StandardScaler().fit(X_temp) 
X_temp = xScale.transform(X_temp)

In [83]:
X_temp.shape

(10350, 17600)

In [84]:
# Reshape data
X_temp_new = []
for i in X_temp:
    reshape = i.reshape(32,550)
    trans = reshape.transpose()
    reshape1 = trans.reshape(550,32,1)
    X_temp_new.append(reshape1)
X_temp = np.asarray(X_temp_new)

In [85]:
X_temp.shape

(10350, 550, 32, 1)

In [86]:
# Load testsplit
test_split = np.load('../data/pred_image_semantics/test_split.npy')
val_split = np.load('../data/pred_image_semantics/val_split.npy')

In [87]:
# Creating the test set
X_test = X_temp[test_split,:,:]

# Deleting test set from training set
X_train = np.delete(X_temp, test_split,0)

# Creating the validation set
X_val = X_train[val_split,:,:]

# Deleting the validation set from training set
X_trainVal = np.delete(X_train, val_split,0)

In [88]:
# Printing shape of data
print(X_test.shape)
print(X_train.shape)
print(X_val.shape)
print(X_trainVal.shape)

(345, 550, 32, 1)
(10005, 550, 32, 1)
(345, 550, 32, 1)
(9660, 550, 32, 1)


### DO NOT RUN Save data

In [89]:
np.save('../data/pred_image_semantics/X_testCNN',X_test)
np.save('../data/pred_image_semantics/X_valCNN',X_val)
np.save('../data/pred_image_semantics/X_trainCNN',X_trainVal)

## Testing the reshape

In [54]:
a = np.array([[[1,1,1],[2,2,2],[3,3,3],[7,7,7]], [[4,4,4],[5,5,5],[6,6,6],[8,8,8]]])
a

array([[[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [7, 7, 7]],

       [[4, 4, 4],
        [5, 5, 5],
        [6, 6, 6],
        [8, 8, 8]]])

In [55]:
new_a = a.reshape(a.shape[0],a.shape[2]*a.shape[1])
new_a

array([[1, 1, 1, 2, 2, 2, 3, 3, 3, 7, 7, 7],
       [4, 4, 4, 5, 5, 5, 6, 6, 6, 8, 8, 8]])

In [56]:
a_new = []
for i in new_a:
    a_new.append(i.reshape(4,3))
a_new = np.asarray(a_new)
a_new

array([[[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [7, 7, 7]],

       [[4, 4, 4],
        [5, 5, 5],
        [6, 6, 6],
        [8, 8, 8]]])