In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import torch.utils.data
import pickle
import numpy as np
import pandas as pd
import scipy.io as sio
import time
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, TensorDataset

In [36]:
# Hyper Parameters
N_EPOCHS = 70
BATCH_SIZE = 32 # how to choose batch_size? too big
LR = 1e-4 # learning rate
HIDDEN_SIZE = 125
INPUT_SIZE = 125
OUTPUT_SIZE = 1
NUM_LAYERS = 4
WINDOW_SIZE = 128

np.random.seed(123)

# Trial 1: Normalizing and using Keras

## Data Preprocessing

In [3]:
dataset_file = "/Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat"
print("Processing",dataset_file,"..........")
data_file_in = sio.loadmat(dataset_file)
data_in = data_file_in["data"].transpose(0,2,1) #tuple # 40*40*8064
window_size = WINDOW_SIZE
# 0 valence, 1 arousal, 2 dominance, 3 liking
label = 1
label_in= data_file_in["labels"][:,label]>5
label_inter = np.empty([0])
data_inter = np.empty([0, window_size, 32])
trials = data_in.shape[0] # trials = 40

Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat ..........


In [4]:
# normalizing first then split into train and test sets
def feature_normalize(data):
    mean = data[data.nonzero()].mean()
    sigma = data[data.nonzero()].std()
    data_normalized = data
    data_normalized[data_normalized.nonzero()] = (data_normalized[data_normalized.nonzero()]-mean)/sigma
    return data_normalized

def norm_dataset(dataset):
    norm_dataset = np.zeros([dataset.shape[0], 32])
    for i in range(dataset.shape[0]):
        norm_dataset[i] = feature_normalize(dataset[i])
    return norm_dataset

# get the data and the label
def windows(data, size):
    start = 0
    while((start+size) < data.shape[0]):
        yield int(start), int(start + size)
        start += size
        
def segment_signal(data,label,label_index,window_size):
    for (start, end) in windows(data, window_size):
        if((len(data[start:end]) == window_size)):
            if(start == 0):
                segments = data[start:end]
                segments = np.vstack([segments, data[start:end]])
                
                labels = np.array(label[label_index])
                labels = np.append(labels, np.array(label[label_index]))
            else:
                segments = np.vstack([segments, data[start:end]])
                labels = np.append(labels, np.array(label[label_index]))
    return segments, labels

In [5]:
for trial in range(0, trials):
    base_signal = (data_in[trial, 0:128, 0:32]+data_in[trial,128:256,0:32]+data_in[trial,256:384,0:32])/3
    data = data_in[trial,384:8064,0:32]
    # compute the deviation
    for i in range(0, 60):
        data[i*128:(i+1)*128,0:32]=data[i*128:(i+1)*128,0:32]-base_signal
    label_index = trial
    
    # feature normalization
    data = norm_dataset(data)
    data, label = segment_signal(data,label_in,label_index,window_size)
    # data reshape
    data = data.reshape(int(data.shape[0]/window_size), window_size, 32) # features=32
    # append new data and label
    data_inter = np.vstack([data_inter, data])
    label_inter = np.append(label_inter, label)

In [6]:
# shuffle data
index = np.array(range(0, len(label_inter)))
np.random.shuffle(index)
shuffled_data = data_inter[index]
shuffled_label = label_inter[index]

In [7]:
print("total data size: ", shuffled_data.shape)
print("total label size: ", shuffled_label.shape)

total data size:  (2400, 128, 32)
total label size:  (2400,)


In [8]:
# already reshaped here
X = shuffled_data # 3D array(samples * window_size(lookback) * features)

## Split the data

In [4]:
from keras import optimizers, Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_curve
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
from sklearn.metrics import precision_recall_fscore_support, f1_score

from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
SEED = 123
DATA_SPLIT_PCT = 0.2

In [11]:
# do we have to divide the test data? valid data?
X_train, X_test = train_test_split(X, test_size=DATA_SPLIT_PCT, random_state=SEED)
X_train, X_valid = train_test_split(X_train, test_size=DATA_SPLIT_PCT, random_state=SEED)

## Standardize the data

In [12]:
# standardized data (transformed to Gaussian, mean 0 and variance 1) 
# better for autoencoders

# if we just normalize the entire data and then split into train-test, 
# that is incorrect since Test data should be completely unseen 
# to anything during the modeling
# therefore, normalize the training data, and use its summary statistics 
# to normalize the test data 
# (for normalization, these statistics are the mean and variances of each feature)

# standardization
for trial in range(0, X_train.shape[0]):
    scaler = StandardScaler().fit(X_train[trial])
    X_train[trial] = scaler.transform(X_train[trial])
    
for trial in range(0, X_valid.shape[0]):
    scaler = StandardScaler().fit(X_valid[trial])
    X_valid[trial] = scaler.transform(X_valid[trial])
    
for trial in range(0, X_test.shape[0]):
    scaler = StandardScaler().fit(X_test[trial])
    X_test[trial] = scaler.transform(X_test[trial])

## Define the model in Keras

In [17]:
samples = shuffled_data.shape[0] # 2400
timesteps = shuffled_data.shape[1] # 128
features = shuffled_data.shape[2] # 32

In [45]:
from keras.layers import LSTM, Dropout

In [58]:
model = Sequential()
# encoder
model.add(LSTM(100, activation='tanh',
               input_shape=(timesteps, features), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(80, activation='tanh', return_sequences=True))
model.add(Dropout(0.5))
# decoder
model.add(LSTM(100, activation='tanh', return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32, activation='tanh', return_sequences=True))
model.add(Dropout(0.3))
model.add(TimeDistributed(Dense(features, activation='sigmoid'))) # two trials: with & without SHOULD have
model.add(Dropout(0.1))
model.compile(optimizer='adam', loss='mse')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_27 (LSTM)               (None, 128, 100)          53200     
_________________________________________________________________
dropout_22 (Dropout)         (None, 128, 100)          0         
_________________________________________________________________
lstm_28 (LSTM)               (None, 128, 80)           57920     
_________________________________________________________________
dropout_23 (Dropout)         (None, 128, 80)           0         
_________________________________________________________________
lstm_29 (LSTM)               (None, 128, 100)          72400     
_________________________________________________________________
dropout_24 (Dropout)         (None, 128, 100)          0         
_________________________________________________________________
lstm_30 (LSTM)               (None, 128, 32)           17024     
__________

In [22]:
# training
# BATCH_SIZE = 32
adam = optimizers.Adam(LR)
model.compile(metrics=['accuracy'], loss='mse', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
# (X, X)
history = model.fit(X_train, X_train, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid, X_valid),
                                                verbose=2).history

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 1536 samples, validate on 384 samples
Epoch 1/70
 - 9s - loss: 0.9999 - acc: 0.0342 - val_loss: 0.9998 - val_acc: 0.0353
Epoch 2/70
 - 6s - loss: 0.9997 - acc: 0.0354 - val_loss: 0.9995 - val_acc: 0.0370
Epoch 3/70
 - 6s - loss: 0.9994 - acc: 0.0374 - val_loss: 0.9993 - val_acc: 0.0390
Epoch 4/70
 - 7s - loss: 0.9991 - acc: 0.0385 - val_loss: 0.9989 - val_acc: 0.0390
Epoch 5/70
 - 6s - loss: 0.9988 - acc: 0.0396 - val_loss: 0.9985 - val_acc: 0.0409
Epoch 6/70
 - 8s - loss: 0.9983 - acc: 0.0406 - val_loss: 0.9979 - val_acc: 0.0421
Epoch 7/70
 - 7s - loss: 0.9975 - acc: 0.0422 - val_loss: 0.9970 - val_acc: 0.0423
Epoch 8/70
 - 6s - loss: 0.9964 - acc: 0.0428 - val_loss: 0.9956 - val_acc: 0.0426
Epoch 9/70
 - 10s - loss: 0.9994 - acc: 0.0431 - val_loss: 1.1017 - val_acc: 0.0432
Epoch 10/70
 - 7s - loss: 25.7210 - acc: 0.0429 - val_loss: 1.7329 - val_acc: 0.0438
Epoch 11/70
 - 8s - loss

In [16]:
# training
# BATCH_SIZE = 32
# loss func: cross entropy
adam = optimizers.Adam(LR)
model.compile(metrics=['accuracy'], loss='categorical_crossentropy', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
# (X, X)
history = model.fit(X_train, X_train, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid, X_valid),
                                                verbose=2).history

Train on 1536 samples, validate on 384 samples
Epoch 1/70
 - 28s - loss: -9.6027e+00 - acc: 0.0376 - val_loss: -1.2419e+01 - val_acc: 0.0306
Epoch 2/70
 - 21s - loss: -1.0649e+01 - acc: 0.0303 - val_loss: -1.0054e+01 - val_acc: 0.0275
Epoch 3/70
 - 23s - loss: -7.4455e+00 - acc: 0.0280 - val_loss: -6.2108e+00 - val_acc: 0.0285
Epoch 4/70
 - 21s - loss: -6.5002e+00 - acc: 0.0283 - val_loss: -6.9014e+00 - val_acc: 0.0286
Epoch 5/70
 - 21s - loss: -7.0454e+00 - acc: 0.0290 - val_loss: -8.4449e+00 - val_acc: 0.0288
Epoch 6/70
 - 20s - loss: -9.6897e+00 - acc: 0.0288 - val_loss: -1.0658e+01 - val_acc: 0.0286
Epoch 7/70
 - 23s - loss: -9.4404e+00 - acc: 0.0293 - val_loss: -6.7202e+00 - val_acc: 0.0289
Epoch 8/70
 - 21s - loss: -6.6543e+00 - acc: 0.0292 - val_loss: -8.1017e+00 - val_acc: 0.0276
Epoch 9/70
 - 21s - loss: -1.1055e+01 - acc: 0.0271 - val_loss: -1.3584e+01 - val_acc: 0.0278
Epoch 10/70
 - 21s - loss: -1.5368e+01 - acc: 0.0279 - val_loss: -1.6663e+01 - val_acc: 0.0277
Epoch 11/70


# Trial 2: Split the data without normalization

Flatten and scale functions

The reason we do not normalize the original 2D data and then create the 3D arrays:

In that way, we will split the data into train and test, followed by their normalization. However, we lose the initial rows of samples.

Define the function

Split the data first without normalizing them

In this way, when we create the 3D arrays on the test data, we will not lose the initial rows of samples

In [8]:
dataset_file = "/Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat"
print("Processing",dataset_file,"..........")
data_file_in = sio.loadmat(dataset_file)
data_in = data_file_in["data"].transpose(0,2,1) #tuple # 40*40*8064
window_size = WINDOW_SIZE
# 0 valence, 1 arousal, 2 dominance, 3 liking
label = 1
label_in= data_file_in["labels"][:,label]>5
label_inter = np.empty([0])
data_inter = np.empty([0, window_size, 32])
trials = data_in.shape[0] # trials = 40

Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat ..........


## standardized data (transformed to Gaussian, mean 0 and variance 1) 

1. better for autoencoders

2. if we just normalize the entire data and then split into train-test, that is incorrect since Test data should be completely unseen to anything during the modeling

3. therefore, normalize the training data, and use its summary statistics to normalize the test data (for normalization, these statistics are the mean and variances of each feature)

In [9]:
# get the data and the label
def windows(data, size):
    start = 0
    while((start+size) < data.shape[0]):
        yield int(start), int(start + size)
        start += size
        
def segment_signal(data,label,label_index,window_size):
    for (start, end) in windows(data, window_size):
        if((len(data[start:end]) == window_size)):
            if(start == 0):
                segments = data[start:end]
                segments = np.vstack([segments, data[start:end]])
                
                labels = np.array(label[label_index])
                labels = np.append(labels, np.array(label[label_index]))
            else:
                segments = np.vstack([segments, data[start:end]])
                labels = np.append(labels, np.array(label[label_index]))
    return segments, labels

In [10]:
for trial in range(0, trials):
    base_signal = (data_in[trial, 0:128, 0:32]+data_in[trial,128:256,0:32]+data_in[trial,256:384,0:32])/3
    data = data_in[trial,384:8064,0:32]
    # compute the deviation
    for i in range(0, 60):
        data[i*128:(i+1)*128,0:32]=data[i*128:(i+1)*128,0:32]-base_signal
    label_index = trial
    data, label = segment_signal(data,label_in,label_index,window_size)
    # data reshape
    data = data.reshape(int(data.shape[0]/window_size), window_size, 32) # features=32
    # append new data and label
    data_inter = np.vstack([data_inter, data])
    label_inter = np.append(label_inter, label)

In [11]:
# shuffle data
index = np.array(range(0, len(label_inter)))
np.random.shuffle(index)
shuffled_data = data_inter[index]
shuffled_label = label_inter[index]

In [12]:
X = shuffled_data
y = shuffled_label

In [13]:
# X matrices are 3D
# FUNCTION: Flattern and scale

def flatten(X):
    flattened_X = np.empty((X.shape[0], X.shape[2]))
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

def scale(X, scaler):
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
    return X

## Split the data

In [39]:
X_train, X_valid = train_test_split(X, test_size=DATA_SPLIT_PCT, random_state=SEED)

## Standardize the data

In [15]:
# Initialize a scaler using the training data.
scaler = StandardScaler().fit(flatten(X_train))

# scaler is the fitted transform object
# use scale func to stanbdardize X_train
X_train_scaled = scale(X_train, scaler)

In [25]:
# the means and variances of each column of the flattened X_train should be 0 and 1
a = flatten(X_train_scaled)
print('colwise mean', np.mean(a, axis=0).round(6))
print('colwise variance', np.var(a, axis=0))
# scaling is correct

colwise mean [-0.  0. -0. -0.  0.  0. -0. -0.  0.  0.  0. -0. -0.  0. -0. -0.  0.  0.
 -0.  0.  0.  0. -0. -0.  0. -0. -0.  0.  0.  0.  0.  0.]
colwise variance [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1.]


In [16]:
# scale the validation and test sets
X_valid_scaled = scale(X_valid, scaler)
X_test_scaled = scale(X_test, scaler)

## Train the model in Keras

In [40]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128, 64)           24832     
_________________________________________________________________
lstm_2 (LSTM)                (None, 128, 12)           3696      
_________________________________________________________________
lstm_3 (LSTM)                (None, 128, 64)           19712     
_________________________________________________________________
lstm_4 (LSTM)                (None, 128, 32)           12416     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 128, 32)           1056      
Total params: 61,712
Trainable params: 61,712
Non-trainable params: 0
_________________________________________________________________


In [50]:
BATCH_SIZE = 64

In [None]:
# train the autoencoder
adam = optimizers.Adam(LR)
# model.compile(metrics=['accuracy'], loss='mse', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier_2.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs_2',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
history = model.fit(X_train, X_train, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid, X_valid),
                                                verbose=2).history
change = model.predict(X_train_scaled)
change_of_shape = change.shape()
print(change_of_shape)

Train on 1920 samples, validate on 480 samples
Epoch 1/70
 - 49s - loss: 30.8916 - val_loss: 30.5934
Epoch 2/70
 - 35s - loss: 30.2782 - val_loss: 30.0831
Epoch 3/70
 - 35s - loss: 29.8942 - val_loss: 29.7508
Epoch 4/70
 - 59s - loss: 29.4858 - val_loss: 29.0965
Epoch 5/70
 - 44s - loss: 28.9758 - val_loss: 28.7728
Epoch 6/70


In [32]:
change = model.predict(X_train_scaled)
change_of_shape = change.shape
print(change_of_shape)

(1536, 128, 32)


In [33]:
print(change[0][1])

[-0.00789138 -0.01395999 -0.01712207 -0.0041155   0.00869718 -0.01039931
  0.03027246  0.03548006  0.04182019  0.03787667  0.04163924  0.01351074
  0.03702186  0.00867695  0.03207814  0.04093201 -0.02381737 -0.04121871
 -0.03078729 -0.02385878 -0.03886621 -0.0243677  -0.03821497 -0.00139337
 -0.02982756 -0.02474514 -0.02675743  0.00376714 -0.01561087 -0.0260346
 -0.00886956  0.02279284]


In [34]:
print(X_train_scaled[0][1])

[-0.59663674 -0.27213707 -0.29902665 -0.38292201 -0.30673741  0.02038107
  0.69990596 -0.28272983  0.34664124  1.79032472  0.32668407 -0.66205099
  0.34384265 -0.03411891 -0.02661511  1.56028603 -0.95095796 -1.09503233
 -0.15762596 -0.53550895 -0.59742585 -0.10158882  0.22013979  0.61311379
  0.59853711 -0.81920563  0.73327508  2.22692641  0.56704558 -0.2173332
  0.00233403 -0.38315393]


In [22]:
BATCH_SIZE = 32

In [23]:
# train the autoencoder
adam = optimizers.Adam(LR)
model.compile(metrics=['accuracy'], loss='categorical_crossentropy', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier_2.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs_2',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
history = model.fit(X_train_scaled, X_train_scaled, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid_scaled, X_valid_scaled),
                                                verbose=2).history

Train on 1536 samples, validate on 384 samples
Epoch 1/70
 - 28s - loss: 3.7809 - acc: 0.0399 - val_loss: 3.2096 - val_acc: 0.0384
Epoch 2/70
 - 26s - loss: 2.8861 - acc: 0.0404 - val_loss: 2.7955 - val_acc: 0.0383
Epoch 3/70
 - 22s - loss: 2.6340 - acc: 0.0402 - val_loss: 2.3522 - val_acc: 0.0383
Epoch 4/70
 - 22s - loss: 1.4635 - acc: 0.0400 - val_loss: 1.2178 - val_acc: 0.0385
Epoch 5/70
 - 22s - loss: 0.4817 - acc: 0.0400 - val_loss: -4.5278e-02 - val_acc: 0.0384
Epoch 6/70
 - 21s - loss: -1.6046e-01 - acc: 0.0394 - val_loss: -8.4352e-01 - val_acc: 0.0382
Epoch 7/70
 - 21s - loss: -1.3779e+00 - acc: 0.0395 - val_loss: -1.2795e+00 - val_acc: 0.0380
Epoch 8/70
 - 21s - loss: -1.2786e+00 - acc: 0.0383 - val_loss: -3.6933e-01 - val_acc: 0.0362
Epoch 9/70
 - 25s - loss: -5.7622e-01 - acc: 0.0365 - val_loss: -5.2402e-01 - val_acc: 0.0358
Epoch 10/70
 - 31s - loss: 2.7561 - acc: 0.0358 - val_loss: 9.9059 - val_acc: 0.0357
Epoch 11/70
 - 28s - loss: 12.0900 - acc: 0.0362 - val_loss: 8.9544

# Trial 3: Redefine the model in PyTorch

In [14]:
# create tensor and variable
featuresTrain = torch.from_numpy(X_train_scaled)
targetsTrain = torch.from_numpy(y_train).type(torch.LongTensor)

featuresTest = torch.from_numpy(X_test_scaled)
targetsTest = torch.from_numpy(y_test).type(torch.LongTensor)

train = TensorDataset(featuresTrain,targetsTrain)
test = TensorDataset(featuresTest,targetsTest)

In [15]:
# data loader
train_loader = DataLoader(train, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test, batch_size = BATCH_SIZE, shuffle = True)

In [16]:
for step, (data, target) in enumerate(train_loader):
    print(data)

tensor([[[-7.7702e-01, -1.8116e-01, -5.3123e-01,  ..., -6.7786e-02,
          -1.0660e-01, -3.2383e-02],
         [-1.9911e+00, -1.2891e+00, -1.3798e+00,  ...,  4.6592e-01,
           9.0882e-01,  9.8246e-01],
         [-1.8771e+00, -1.3307e+00, -1.3995e+00,  ...,  2.6868e-01,
           6.3808e-01,  7.2395e-01],
         ...,
         [-1.3859e+00, -5.6743e-01, -3.5615e-01,  ..., -5.2401e-01,
          -3.6399e-02, -5.3801e-01],
         [-6.6513e-01, -1.1431e-01,  1.2924e-01,  ...,  4.9394e-01,
          -1.9172e-01, -2.2182e-01],
         [ 6.6213e-01,  7.2583e-01,  7.9492e-01,  ..., -1.1366e-01,
          -1.3477e-01,  4.0836e-02]],

        [[ 1.3988e+00,  1.0932e+00,  2.5204e-01,  ...,  3.7086e-01,
          -2.0436e-01,  3.8643e-01],
         [ 9.7913e-01,  6.5122e-01,  2.9241e-02,  ...,  3.5880e-01,
           9.7444e-02,  2.1237e-01],
         [ 5.0088e-01, -2.8536e-01, -4.7126e-01,  ..., -7.0332e-01,
           3.4533e-01,  2.3809e-01],
         ...,
         [ 7.9972e-01,  2

tensor([[[-0.1849,  0.1148,  0.2156,  ...,  0.2371, -0.0334,  0.4114],
         [-0.3902,  0.1006, -0.0528,  ...,  0.4093,  0.4494,  0.4543],
         [ 0.3867,  0.4224, -0.0385,  ...,  0.9692,  0.4434,  0.4120],
         ...,
         [-0.0260,  0.0534,  0.2172,  ...,  0.0815, -0.1010, -0.3784],
         [ 0.0381,  0.2771,  0.5873,  ..., -0.7659, -0.2702, -0.7691],
         [ 0.0783,  0.2604,  0.3825,  ..., -0.4437, -0.0111, -0.3373]],

        [[ 1.8812,  1.7429,  1.3321,  ..., -2.6641, -2.1558, -2.2221],
         [ 1.6741,  1.2588,  0.7084,  ..., -2.0546, -1.7793, -1.7174],
         [ 0.4969,  0.1893, -0.0684,  ..., -1.7726, -0.9042, -0.6783],
         ...,
         [-1.3383, -1.2822, -1.3490,  ...,  2.0533,  1.7994,  1.8123],
         [-0.6212, -0.5746, -0.7122,  ...,  1.2300,  0.4876,  0.4341],
         [ 0.1258,  0.1196, -0.0538,  ..., -0.9140, -0.8960, -0.9394]],

        [[-0.3218, -0.2579, -0.1121,  ..., -0.0265, -0.1520, -0.2050],
         [-0.7524, -0.3900, -0.0996,  ..., -0

tensor([[[-9.3205e-01, -5.7469e-01, -2.5751e-01,  ..., -8.3682e-01,
          -9.7873e-01, -6.5994e-01],
         [ 8.6066e-03,  5.7670e-02, -6.4626e-02,  ..., -1.3583e+00,
          -1.1262e+00, -9.0557e-01],
         [ 1.2103e+00,  5.9511e-01,  9.0424e-02,  ..., -9.2013e-01,
          -7.9105e-01, -8.6715e-01],
         ...,
         [ 1.9682e+00,  9.9382e-01,  5.0718e-01,  ...,  2.9383e-01,
          -4.7521e-01, -1.0114e+00],
         [ 2.6286e+00,  1.5969e+00,  7.8830e-01,  ..., -1.4875e-01,
          -7.7207e-01, -1.4195e+00],
         [ 1.7336e+00,  1.6942e+00,  1.2084e+00,  ...,  4.8411e-01,
          -1.0740e+00, -1.6507e+00]],

        [[ 2.4381e-01, -1.2494e-04, -2.1999e-01,  ...,  8.3196e-01,
          -1.1735e-01, -2.6139e-01],
         [ 1.0679e+00,  3.5009e-01,  1.5648e-01,  ...,  6.0141e-02,
          -6.8987e-01, -9.8186e-01],
         [ 1.6246e+00,  1.0527e+00,  7.9171e-01,  ..., -6.2539e-01,
          -1.3134e+00, -1.5664e+00],
         ...,
         [ 9.2108e-01,  6

tensor([[[-1.2598e-01, -5.8884e-02, -2.0510e-02,  ...,  7.6775e-01,
          -4.8797e-01, -7.9509e-02],
         [-1.0241e+00, -1.0695e+00, -9.7016e-01,  ...,  1.3512e+00,
           4.2093e-01,  3.3645e-01],
         [-1.1963e+00, -1.5375e+00, -1.2794e+00,  ...,  1.0724e+00,
           7.9902e-01,  7.6680e-01],
         ...,
         [ 7.2953e-01,  6.4427e-01,  3.1279e-01,  ..., -8.7787e-02,
          -8.7595e-01, -8.6491e-01],
         [-3.9245e-01, -5.4548e-01, -6.8330e-01,  ...,  6.8733e-01,
          -9.1156e-01, -8.0618e-01],
         [-7.2570e-01, -8.5788e-01, -9.2394e-01,  ..., -1.1212e-01,
          -5.3884e-01, -5.8845e-01]],

        [[-5.9502e-01, -5.8461e-01, -4.1195e-03,  ..., -5.1158e-02,
          -1.8320e-02,  4.4156e-03],
         [-9.1412e-01, -1.2267e+00, -5.8991e-01,  ...,  5.8228e-02,
           1.3396e-01,  2.6999e-03],
         [-2.8110e-01, -9.5427e-01, -5.4418e-01,  ...,  1.8008e-01,
          -2.6095e-01, -1.0229e-01],
         ...,
         [-6.1683e-01, -6

In [17]:
data.shape

torch.Size([12, 64, 32])

In [24]:
data = data.resize_([12, 64, 125])

In [26]:
model(data)

RuntimeError: Expected hidden[0] size (8, 12, 125), got (4, 12, 125)

In [19]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                           dropout=0.5, bidirectional=bidirectional)
        self.encoder = nn.Sequential(
            nn.Linear(32, 64), #(in_features, out_features)
            nn.ReLU(inplace=True),
            nn.Linear(64, 12), # compress
        )
        
        nn.init.orthogonal_(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.orthogonal_(self.lstm.weight_hh_l0, gain=np.sqrt(2))
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        out, _ = self.lstm(x, (h0, c0))  
        # out: (batch_size, seq_length, hidden_size)
        
        return out[:, -1, :].unsqueeze(1)
    
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, bidirectional):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True,
                            dropout=0.5, bidirectional=bidirectional)
        self.decoder = nn.Sequential(
            nn.Linear(12, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 32),
            nn.Sigmoid(),
        )
        
        nn.init.orthogonal_(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.orthogonal_(self.lstm.weight_hh_l0, gain=np.sqrt(2))
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.output_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.output_size)

        out, _ = self.lstm(x, (h0, c0))  
        # out: (batch_size, seq_length, hidden_size)

        return out
    
class AutoEncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bidirectional=False):
        super(AutoEncoderRNN, self).__init__()
        self.encoder = EncoderRNN(input_size, hidden_size, num_layers, bidirectional)
        self.decoder = DecoderRNN(hidden_size, input_size, num_layers, bidirectional)

    def forward(self, x):
        encoded_x = self.encoder(x)
        decoded_x = self.decoder(encoded_x)

        return decoded_x

In [20]:
model = AutoEncoderRNN(input_size=INPUT_SIZE, 
                       hidden_size=HIDDEN_SIZE, 
                       num_layers=NUM_LAYERS, 
                       # output_size=OUTPUT_SIZE, 
                       bidirectional=True)
crit = nn.CrossEntropyLoss() # nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR) # optim.SGD(model.parameters(), lr = LR)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5) # lr decay

In [21]:
model

AutoEncoderRNN(
  (encoder): EncoderRNN(
    (lstm): LSTM(125, 125, num_layers=4, batch_first=True, dropout=0.5, bidirectional=True)
    (encoder): Sequential(
      (0): Linear(in_features=32, out_features=64, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=64, out_features=12, bias=True)
    )
  )
  (decoder): DecoderRNN(
    (lstm): LSTM(125, 125, num_layers=4, batch_first=True, dropout=0.5, bidirectional=True)
    (decoder): Sequential(
      (0): Linear(in_features=12, out_features=64, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): Sigmoid()
    )
  )
)

In [22]:
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

def train(model, train_loader, optimizer, crit):
    epoch_loss, epoch_acc = 0., 0.
    model.train()
    total_len = 0.
    # for batch in iterator:
    for step, (data, target) in enumerate(train_loader):
        print(data.shape)
        preds = model(data) # squeeze() # [batch_size] # out
        loss = crit(preds, target)
        acc = binary_accuracy(preds, target)
        
        # SGD
        optimizer.zero_grad()
        loss_backward()
        optimizer.step()
        
        epoch_loss += loss_item() * len(target)
        epoch_acc += acc.item() * len(target)
        total_len += len(target)
        
    return epoch_loss / total_len, epoch_acc / total_len

def evaluate(model, test_loader, crit):
    epoch_loss, epoch_acc = 0., 0.
    model.eval()
    total_len = 0.
    for step, (data, target) in enumerate(test_loader):
        preds = model(data) # .squeeze()
        loss = crit(preds, target)
        acc = binary_accuracy(preds, target)
        
        epoch_loss += loss_item() * len(target)
        epoch_acc += acc.item() * len(target)
        total_len += len(target)
    model.train()
        
    return epoch_loss / total_len, epoch_acc / total_len

In [23]:
# training and testing
valid_losses = []

for epoch in range(N_EPOCHS):
    start_time = time.time()
    
    train_loss, train_acc = train(model, train_loader, optimizer, crit)
    valid_loss, valid_acc = evaluate(model, test_loader, crit)
    
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if i % 1 == 0:
        if len(val_losses) == 0 or valid_loss < min(valid_losses):
            torch.save(model.state_dict(), "/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier_3.pth")
            print("best model saved to lstm_autoencoder_classifier_3.pth")
        else:
            # lr decay
            scheduler.step()
            optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        val_losses.append(val_loss)
            
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print("Epoch", epoch, "Train Loss", train_loss, "Train Acc", train_acc)
    print("Epoch", epoch, "Valid_loss", valid_loss, "Valid_acc", valid_acc)

torch.Size([32, 128, 32])


RuntimeError: input.size(-1) must be equal to input_size. Expected 125, got 32

In [None]:
# Plotting the result
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

plt.figure()
plt.plot(val_losses)

In [37]:
data.shape

torch.Size([32, 128, 32])

In [55]:
data.shape

(123, 64, 32)

In [58]:
data.shape

torch.Size([12, 64, 32])

In [60]:
data2 = data.resize_([12, 64, 125])

In [42]:
data2.size(-1)

125

# Trial 4: Overlapping preprocessing put in keras

# Overlapping the dataset

### Preprocessing

In [3]:
dataset_file = "/Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat"
print("Processing",dataset_file,"..........")
data_file_in = sio.loadmat(dataset_file)
data_in = data_file_in["data"].transpose(0,2,1) #tuple # 40*40*8064
window_size = 64
# 0 valence, 1 arousal, 2 dominance, 3 liking
label = 1
label_in= data_file_in["labels"][:,label]>5
label_inter = np.empty([0])
data_inter = np.empty([0, window_size, 32])
trials = data_in.shape[0] # trials = 40

Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat ..........


### Overlapping

In [85]:
data_inter.shape

(0, 64, 32)

In [28]:
data = data_in[trial,384:8064,0:32] # 60 ((8064-384) / 128)

In [42]:
data.shape

(7680, 32)

In [29]:
len(data)

7680

In [31]:
r = np.arange(len(data))

In [32]:
r

array([   0,    1,    2, ..., 7677, 7678, 7679])

In [33]:
s = r[::64]

In [35]:
len(s)

120

In [53]:
def windows(data, size):
    start = 0
    size = 0.5 * size
    while((start+size) < data.shape[0]):
        yield int(start), int(start + size)
        start += size

In [55]:
for (start, end) in windows(data, window_size):
        print((start, end))

(0, 64)
(64, 128)
(128, 192)
(192, 256)
(256, 320)
(320, 384)
(384, 448)
(448, 512)
(512, 576)
(576, 640)
(640, 704)
(704, 768)
(768, 832)
(832, 896)
(896, 960)
(960, 1024)
(1024, 1088)
(1088, 1152)
(1152, 1216)
(1216, 1280)
(1280, 1344)
(1344, 1408)
(1408, 1472)
(1472, 1536)
(1536, 1600)
(1600, 1664)
(1664, 1728)
(1728, 1792)
(1792, 1856)
(1856, 1920)
(1920, 1984)
(1984, 2048)
(2048, 2112)
(2112, 2176)
(2176, 2240)
(2240, 2304)
(2304, 2368)
(2368, 2432)
(2432, 2496)
(2496, 2560)
(2560, 2624)
(2624, 2688)
(2688, 2752)
(2752, 2816)
(2816, 2880)
(2880, 2944)
(2944, 3008)
(3008, 3072)
(3072, 3136)
(3136, 3200)
(3200, 3264)
(3264, 3328)
(3328, 3392)
(3392, 3456)
(3456, 3520)
(3520, 3584)
(3584, 3648)
(3648, 3712)
(3712, 3776)
(3776, 3840)
(3840, 3904)
(3904, 3968)
(3968, 4032)
(4032, 4096)
(4096, 4160)
(4160, 4224)
(4224, 4288)
(4288, 4352)
(4352, 4416)
(4416, 4480)
(4480, 4544)
(4544, 4608)
(4608, 4672)
(4672, 4736)
(4736, 4800)
(4800, 4864)
(4864, 4928)
(4928, 4992)
(4992, 5056)
(5056, 5

In [None]:
# num.Trial = 40
# time_samples = 63 sec (8054 / 128 fs)
# features(channel) = 32

In [6]:
# get the data and the label
def windows(data, size):
    start = 0
    size = 0.5 * size
    while((start+size) < data.shape[0]):
        yield int(start), int(start + size)
        start += size
        
def segment_signal(data,label,label_index,window_size):
    for (start, end) in windows(data, window_size):
        if((len(data[start:end]) == 0.5 * window_size)):#64
            if(start == 0):
                segments = data[start:end]
                segments = np.vstack([segments, data[start:end]])
                
                labels = np.array(label[label_index])
                labels = np.append(labels, np.array(label[label_index]))
            else:
                segments = np.vstack([segments, data[start:end]])
                labels = np.append(labels, np.array(label[label_index]))
    return segments, labels

In [7]:
for trial in range(0, trials):
    base_signal = (data_in[trial, 0:64, 0:32]+data_in[trial,64:128,0:32]+data_in[trial,128:192,0:32])/3
    data = data_in[trial,192:8064,0:32]
    # compute the deviation
    for i in range(0, 123):
        data[i*64:(i+1)*64,0:32]=data[i*64:(i+1)*64,0:32]-base_signal
    label_index = trial
    data, label = segment_signal(data,label_in,label_index,window_size)
    # data reshape
    data = data.reshape(int(data.shape[0]/64), 64, 32) # features=32
    # append new data and label
    data_inter = np.vstack([data_inter, data])
    label_inter = np.append(label_inter, label)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 128 and the array at index 1 has size 64

In [47]:
data_inter.shape

(4920, 64, 32)

In [10]:
label_inter.shape

(9840,)

In [6]:
# shuffle data
index = np.array(range(0, len(data_inter)))
np.random.shuffle(index)
shuffled_data = data_inter[index]
shuffled_label = label_inter[index]

In [7]:
X = shuffled_data
y = shuffled_label

In [11]:
X_train, X_test = train_test_split(X, test_size=DATA_SPLIT_PCT, random_state=SEED)
X_train, X_valid = train_test_split(X_train, test_size=DATA_SPLIT_PCT, random_state=SEED)

y_train, y_test = train_test_split(y, test_size=DATA_SPLIT_PCT, random_state=SEED)
y_train, y_valid = train_test_split(y_train, test_size=DATA_SPLIT_PCT, random_state=SEED)

### Standardizing

In [12]:
def flatten(X):
    flattened_X = np.empty((X.shape[0], X.shape[2]))
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

def scale(X, scaler):
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
    return X

In [13]:
# Initialize a scaler using the training data.
scaler = StandardScaler().fit(flatten(X_train))

# scaler is the fitted transform object
# use scale func to stanbdardize X_train
X_train_scaled = scale(X_train, scaler)
# scale the validation and test sets
X_valid_scaled = scale(X_valid, scaler)
X_test_scaled = scale(X_test, scaler)

### Define and train the model

In [63]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128, 64)           24832     
_________________________________________________________________
lstm_2 (LSTM)                (None, 128, 12)           3696      
_________________________________________________________________
lstm_3 (LSTM)                (None, 128, 64)           19712     
_________________________________________________________________
lstm_4 (LSTM)                (None, 128, 32)           12416     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 128, 32)           1056      
Total params: 61,712
Trainable params: 61,712
Non-trainable params: 0
_________________________________________________________________


In [18]:
BATCH_SIZE = 200

In [19]:
adam = optimizers.Adam(LR)
model.compile(metrics=['accuracy'], loss='categorical_crossentropy', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier_4.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs_4',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
history = model.fit(X_train_scaled, X_train_scaled, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid_scaled, X_valid_scaled),
                                                verbose=2).history


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 3148 samples, validate on 788 samples
Epoch 1/70
 - 8s - loss: -5.0385e+00 - acc: 0.0204 - val_loss: -5.6117e+00 - val_acc: 0.0224
Epoch 2/70
 - 6s - loss: -5.7105e+00 - acc: 0.0222 - val_loss: -5.6929e+00 - val_acc: 0.0244
Epoch 3/70
 - 6s - loss: -5.2380e+00 - acc: 0.0250 - val_loss: -4.9087e+00 - val_acc: 0.0271
Epoch 4/70
 - 6s - loss: -5.5844e+00 - acc: 0.0249 - val_loss: -5.2523e+00 - val_acc: 0.0230
Epoch 5/70
 - 6s - loss: -4.6700e-01 - acc: 0.0222 - val_loss: 8.6963 - val_acc: 0.0240
Epoch 6/70
 - 6s - loss: 8.5782 - acc: 0.0279 - val_loss: 6.7657 - val_acc: 0.0335
Epoch 7/70
 - 6s - loss: 4.8211 - acc: 0.0367 - val_loss: 2.1170 - val_acc: 0.0410
Epoch 8/70
 - 6s - loss: -3.4466e-01 - acc: 0.0431 - val_loss: -2.2388e+00 - val_acc: 0.0443
Epoch 9/70
 - 5s - loss: -3.3949e+00 - acc: 0.0457 - val_loss: -5.3219e+00 - val_acc: 0.0492
Epoch 10/70
 - 5s - loss: -7.0796e+00 - acc: 

In [20]:
BATCH_SIZE = 32

In [21]:
adam = optimizers.Adam(LR)
model.compile(metrics=['accuracy'], loss='categorical_crossentropy', optimizer=adam)

cp = ModelCheckpoint(filepath="/Users/zouhao/Desktop/EEGResearch/lstm_autoencoder_classifier_4.h5",
                               save_best_only=True,
                               verbose=0)
tb = TensorBoard(log_dir='/Users/zouhao/Desktop/EEGResearch/logs_4',
                histogram_freq=0,
                write_graph=True,
                write_images=True)
history = model.fit(X_train_scaled, X_train_scaled, 
                                                epochs=N_EPOCHS, 
                                                batch_size=BATCH_SIZE, 
                                                validation_data=(X_valid_scaled, X_valid_scaled),
                                                verbose=2).history

Train on 3148 samples, validate on 788 samples
Epoch 1/70
 - 23s - loss: 2.2489 - acc: 0.0247 - val_loss: 2.1674 - val_acc: 0.0223
Epoch 2/70
 - 19s - loss: -2.2314e+00 - acc: 0.0188 - val_loss: 7.7126 - val_acc: 0.0187
Epoch 3/70
 - 20s - loss: 0.4941 - acc: 0.0175 - val_loss: -2.1747e+00 - val_acc: 0.0181
Epoch 4/70
 - 19s - loss: -4.0131e+00 - acc: 0.0170 - val_loss: 4.0840 - val_acc: 0.0176
Epoch 5/70
 - 19s - loss: 4.3228 - acc: 0.0183 - val_loss: 2.6694 - val_acc: 0.0267
Epoch 6/70
 - 19s - loss: -2.7257e+00 - acc: 0.0281 - val_loss: -2.6560e+00 - val_acc: 0.0293
Epoch 7/70
 - 19s - loss: -4.7049e+00 - acc: 0.0292 - val_loss: -5.6257e+00 - val_acc: 0.0293
Epoch 8/70
 - 19s - loss: -6.4444e+00 - acc: 0.0293 - val_loss: -6.7927e+00 - val_acc: 0.0302
Epoch 9/70
 - 19s - loss: -8.0359e+00 - acc: 0.0281 - val_loss: -7.3658e+00 - val_acc: 0.0256
Epoch 10/70
 - 19s - loss: 5.0064 - acc: 0.0225 - val_loss: 9.0745 - val_acc: 0.0238
Epoch 11/70
 - 19s - loss: 8.6549 - acc: 0.0257 - val_los