### Import Library

In [164]:
import torch
from torch.autograd import Variable
import os
from collections import defaultdict
import _pickle as cPickle
import numpy as np
import time
from scipy.stats import kurtosis, skew

### Data preprocessing

In [2]:
def preprocessing(data):
    result = []
    mean = np.mean(data)
    median = np.median(data)
    maximum = np.amax(data)
    minimum = np.amin(data)
    std_dev = np.std(data)
    var = np.var(data)
    ran = np.ptp(data)
    skewness = skew(data)
    kurto = kurtosis(data)
    result=[mean, median, maximum, minimum,
                   std_dev, var, ran, skewness, kurto]
    return result

In [3]:
dirname = './data_preprocessed_python'
filenames = os.listdir(dirname)
data_dic = defaultdict(lambda:[])
for i, filename in enumerate(filenames):
    full_filename = os.path.join(dirname, filename)
    x = cPickle.load(open(full_filename, 'rb'), encoding='ISO-8859-1')
    data_dic[filename[:-4]] = x

In [42]:
new_data_dic = defaultdict(lambda:defaultdict(lambda:[]))
for key in data_dic.keys():
    par = int(key[1:])
    
    for i, vid in enumerate(data_dic[key]['data']):
        new_data_dic[key]['data'].append([])
        for chnn in vid[:-8]:
            l =preprocessing(np.array(chnn))
            for batch in range(0,10):
                l = l+preprocessing(np.array(chnn[batch*807:max((batch+1)*807, 8064)]))
            new_data_dic[key]['data'][i].append(np.array([par, i+1]+l))
    for i,  vid in enumerate(data_dic[key]['labels']):
        l2 = []
        new_data_dic[key]['labels'].append([])
        for value in vid:
            l2.append(value)
        new_data_dic[key]['labels'][i] += l2

In [43]:
data_loader = []
idx = 0
print(len(new_data_dic.keys()))
for par in new_data_dic.keys():
    if par != 's02':
        for i, vid in enumerate(new_data_dic[par]['data']):
            data_loader.append((np.array(vid), new_data_dic[par]['labels'][i]))
            idx +=1

32


#### Set train & test data set

In [87]:
train_data = []
train_label = []
test_data = []
test_label = []
for i in new_data_dic:
    print("process {}".format(i))
    if i != 's02':
        
        for j in range(len(new_data_dic[i]['labels'])):
            temp = np.array(new_data_dic[i]['data'][j]).reshape(-1, 32*101)[0]
            train_data.append(temp)
            train_label.append(new_data_dic[i]['labels'][j])
    else:
        for j in range(len(new_data_dic[i]['labels'])):
            temp = np.array(new_data_dic[i]['data'][j]).reshape(-1, 32*101)[0]
            test_data.append(temp)
            test_label.append(new_data_dic[i]['labels'][j])
print("Finished!")

process s01
process s02
process s03
process s04
process s05
process s06
process s07
process s08
process s09
process s10
process s11
process s12
process s13
process s14
process s15
process s16
process s17
process s18
process s19
process s20
process s21
process s22
process s23
process s24
process s25
process s26
process s27
process s28
process s29
process s30
process s31
process s32
Finished!


### Define DNN model
- nodes : 3232 -> 5000 -> 500 -> 1000 -> 4
- activate function : ReLU
- Dropout probability : 0.25(input), 0.5(hidden layer)
- Loss function : MSELoss()
- optimizer : RMSprop

In [84]:
linear1 = torch.nn.Linear(3232, 5000, bias=True)
linear2 = torch.nn.Linear(5000, 500, bias=True)
linear3 = torch.nn.Linear(500, 1000, bias=True)
linear4 = torch.nn.Linear(1000, 4, bias=True)
relu = torch.nn.ReLU()
model_1 = torch.nn.Sequential(linear1, torch.nn.Dropout(0.25), relu, 
                           linear2, torch.nn.Dropout(0.5), relu,
                           linear3, torch.nn.Dropout(0.5), relu,
                           linear4)
print(model_1)

Sequential(
  (0): Linear(in_features=3232, out_features=5000, bias=True)
  (1): Dropout(p=0.25)
  (2): ReLU()
  (3): Linear(in_features=5000, out_features=500, bias=True)
  (4): Dropout(p=0.5)
  (5): ReLU()
  (6): Linear(in_features=500, out_features=1000, bias=True)
  (7): Dropout(p=0.5)
  (8): ReLU()
  (9): Linear(in_features=1000, out_features=4, bias=True)
)


#### let's train it!

In [106]:
loss_function = torch.nn.MSELoss()

optimizer = torch.optim.RMSprop(model_1.parameters(), lr=learning_rate)
batch_size = 310
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = 1240 // batch_size

    for i in range(len(train_data)):
        X = Variable(torch.FloatTensor(train_data[i]))
        Y = Variable(torch.FloatTensor(train_label[i]))
        optimizer.zero_grad()
        Y_prediction = model_1(X)
#         print(X, Y_prediction, Y)
        loss = loss_function(Y_prediction, Y)
        loss.backward()
        optimizer.step()
        avg_cost += loss / total_batch
#         print(avg_cost)
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch+1, avg_cost.data[0]))

print('Learning Finished~!#@')



[Epoch:    1] cost = 59268.625
[Epoch:    2] cost = 18937.832
[Epoch:    3] cost = 12786.5693
[Epoch:    4] cost = 10477.3994
[Epoch:    5] cost = 9731.93066
[Epoch:    6] cost = 10046.6348
[Epoch:    7] cost = 7328.01367
[Epoch:    8] cost = 9021.86133
[Epoch:    9] cost = 11640.5078
[Epoch:   10] cost = 9779.1377
[Epoch:   11] cost = 7328.354
[Epoch:   12] cost = 7731.64453
[Epoch:   13] cost = 7987.19287
[Epoch:   14] cost = 9754.41895
[Epoch:   15] cost = 7438.60889
[Epoch:   16] cost = 7943.44482
[Epoch:   17] cost = 8137.98682
[Epoch:   18] cost = 7677.93213
[Epoch:   19] cost = 8466.42676
[Epoch:   20] cost = 8196.48242
[Epoch:   21] cost = 10652.7861
[Epoch:   22] cost = 10716.8955
[Epoch:   23] cost = 8147.56836
[Epoch:   24] cost = 10065.4395
[Epoch:   25] cost = 6176.28223
[Epoch:   26] cost = 7714.74072
[Epoch:   27] cost = 9396.22754
[Epoch:   28] cost = 7256.9209
[Epoch:   29] cost = 9431.3418
[Epoch:   30] cost = 7411.97412
[Epoch:   31] cost = 7138.22461
[Epoch:   32] c

#### Calculate accuracy
- 2 classes and 3 classes

In [78]:
total = 160

all_predict = []
all_answer = []
all_accu = 0
for i in range(10):
    correct = 0
    for index, i in enumerate(new_data_dic['s02']['data']):
        x_test = Variable(torch.Tensor(i).view(-1, 32*101))
        y_test = model(x_test)
#         print(y_test)
        predict = []
        answer = []
        for j in y_test[0]:
            if float(j)>5:
                predict.append(1)
            else:
                predict.append(0)
        for j in new_data_dic['s02']['labels'][index]:
#             print(j)
            if j>5:
                answer.append(1)
            else:
                answer.append(0)
#         print(predict, answer)
        all_predict.append(predict)
        all_answer.append(answer)

    # print(np.array(all_predict).reshape(-1, 40*4), np.array(all_answer).reshape(-1, 40*4))

    for i in range(len(np.array(all_predict).reshape(-1, 40*4)[0])):
#         print(np.array(all_predict).reshape(-1, 40*4)[0][i], np.array(all_answer).reshape(-1, 40*4)[0][i])
        if(np.array(all_predict).reshape(-1, 40*4)[0][i] == np.array(all_answer).reshape(-1, 40*4)[0][i]):
            correct += 1
    all_accu += correct/total
    print(correct, total, correct/total)
print("Accuracy: {}%".format((all_accu/10) * 100))

85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
85 160 0.53125
Accuracy: 53.125%


In [122]:
total = 160

all_predict = []
all_answer = []
all_accu = 0
for i in range(10):
    correct = 0
    for index, i in enumerate(new_data_dic['s02']['data']):
        x_test = Variable(torch.Tensor(i).view(-1, 32*101))
        y_test = model_1(x_test)
        predict = []
        answer = []
        for j in y_test[0]:
            if float(j)>6:
                predict.append(1)
            elif float(j) <= 6 and float(j) > 4:
                predict.append(0)
            else:
                predict.append(-1)
        for j in new_data_dic['s02']['labels'][index]:
            if j>6:
                answer.append(1)
            elif j <= 6 and j>4:
                answer.append(0)
            else:
                answer.append(-1)
#         print(predict, answer)
        all_predict.append(predict)
        all_answer.append(answer)


    # print(np.array(all_predict).reshape(-1, 40*4), np.array(all_answer).reshape(-1, 40*4))

    for i in range(len(np.array(all_predict).reshape(-1, 40*4)[0])):
#         print(np.array(all_predict).reshape(-1, 40*4)[0][i], np.array(all_answer).reshape(-1, 40*4)[0][i])
        if(np.array(all_predict).reshape(-1, 40*4)[0][i] == np.array(all_answer).reshape(-1, 40*4)[0][i]):
            correct += 1
    all_accu += correct/total
    print(correct, total, correct/total)
print("Accuracy: {}%".format((all_accu/10) * 100))

45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
45 160 0.28125
Accuracy: 28.125%


### Result
the 2 classes accuracy is : 49~53% and the 3 classes accuracy is : 28~33%<br>
before we use EEG data sets the accuracy is : 69~73%
<img src="before.png">