### Import Library

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import torch 
import itertools
import pandas as pd 
import pickle 
import numpy as np
import mne
torch.manual_seed(0)

<torch._C.Generator at 0x7fdb3e5f3d90>

### Select Dataset 


In [None]:
def loaddat(filename):        
    with open(filename, 'rb') as handle:
        b = pickle.load(handle)          
    return b

In [None]:
dname = dict(BNCI2014004 = 'aBNCI2014004R.pickle',
             BNCI2014001 = 'aBNCI2014001R.pickle',
             Weibo2014   = 'aWeibo2014R.pickle',
             Physionet   = 'aPhysionetRR.pickle')

In [None]:
# itemname is one of : ['BNCI2014004', 'BNCI2014001', 'Weibo2014', 'Physionet']
itemname = 'BNCI2014001'
filename = dname[itemname]

iname = itemname + '__'    

In [None]:
data = loaddat(filename)

In [None]:
data[0]['right_hand'].plot();

In [None]:
data

### Load subject specific data

In [None]:
from nu_smrutils import subject_specific, augment_dataset, crop_data

In [None]:
subjectIndex = list(range(108))
class_name = ['left_hand', 'right_hand']

datt = subject_specific(data, subjectIndex, class_name, 
                        normalize = True, test_size = 0.15)

### Subspe Data augmentation 

In [None]:
for dat in datt:    
    print(dat['xtrain'].shape)
datt[0].keys()

In [None]:
# augment only the training data 
augmdata = dict(std_dev = 0.01,  multiple = 2)

for ii, dat in enumerate(datt):    
    xtrain, ytrain = augment_dataset(dat['xtrain'], dat['ytrain'], std_dev = augmdata['std_dev'], 
                                     multiple = augmdata['multiple'])
    print(xtrain.shape)
    datt[ii]['xtrain'], datt[ii]['ytrain'] = xtrain, ytrain

### Subspe Data Cropping

In [None]:
fs = 80 # sampling frequency 
crop_len = 1.5 #or None
crop = dict(fs = fs, crop_len = crop_len)

In [None]:
for ii, dat in enumerate(datt):
    if crop['crop_len']:
       X_train, y_train = crop_data(crop['fs'], crop['crop_len'], dat['xtrain'], dat['ytrain'])
       X_valid, y_valid = crop_data(crop['fs'], crop['crop_len'], dat['xvalid'], dat['yvalid'])
       X_test,  y_test  = crop_data(crop['fs'], crop['crop_len'], dat['xtest'],  dat['ytest'])
       
       print(X_train.shape)
       datt[ii] = dict(xtrain = X_train, xvalid = X_valid, xtest = X_test,
                       ytrain = y_train, yvalid = y_valid, ytest = y_test)                      

### Pytorch dataloaders 

In [None]:
from torch.utils.data import TensorDataset, DataLoader  

def get_data_loaders(dat, batch_size, EEGNET = None):    
    # convert data dimensions to into to gray scale image format
    if EEGNET: ### EEGNet model requires the last dimension to be 1 
        ff = lambda dat: torch.unsqueeze(dat, dim = -1)    
    else:
        ff = lambda dat: torch.unsqueeze(dat, dim = 1)    
        
    x_train, x_valid, x_test =  map(ff,(dat['xtrain'],dat['xvalid'],dat['xtest']))    
    y_train, y_valid, y_test =  dat['ytrain'], dat['yvalid'], dat['ytest']
    print('Input data shape', x_train.shape)       
    
    # TensorDataset & Dataloader    
    train_dat = TensorDataset(x_train, y_train) 
    val_dat   = TensorDataset(x_valid, y_valid) 
    
    train_loader = DataLoader(train_dat, batch_size = batch_size, shuffle = True)
    val_loader   = DataLoader(val_dat,   batch_size = batch_size, shuffle = False)

    output = dict(dset_loaders = {'train': train_loader, 'val': val_loader}, 
                  dset_sizes  =  {'train': len(x_train), 'val': len(x_valid)},
                  test_data   =  {'x_test' : x_test, 'y_test' : y_test})          
    return output 

### Subspe dataloaders

In [None]:
for ii, dat in enumerate(datt): #for each dataset 
    datt[ii] = get_data_loaders(dat, batch_size = 64)

In [None]:
datt[0].keys()

## CNN constructor

In [None]:
import torch.nn as nn
class CNN2D(torch.nn.Module):  
    def __init__(self, input_size, kernel_size, conv_channels, dense_size, dropout):         
        super(CNN2D, self).__init__()        
        self.cconv   = []  
        self.MaxPool = nn.MaxPool2d((1, 2), (1, 2))  
        self.ReLU    = nn.ReLU()
        self.Dropout = nn.Dropout(dropout)        
        self.batchnorm = []        
        # ############ batchnorm ###########
        for jj in conv_channels:
            self.batchnorm.append(nn.BatchNorm2d(jj, eps=0.001, momentum=0.01,
                                                 affine=True, track_running_stats=True).cuda())         
        # ############ define CONV layer architecture: #########
        ii = 0
        for in_channels, out_channels in zip(conv_channels, conv_channels[1:]):                           
            conv_i = torch.nn.Conv2d(in_channels  = in_channels, out_channels = out_channels,
                                     kernel_size  = kernel_size[ii], #stride = (1, 2),
                                     padding      = (kernel_size[ii][0]//2, kernel_size[ii][1]//2))
            
            self.cconv.append(conv_i)                
            self.add_module('CNN_K{}_O{}'.format(kernel_size[ii], out_channels), conv_i)
            ii += 1 
        ##########################################################    
        self.flat_dim = self.get_output_dim(input_size, self.cconv)    
        self.fc1 = torch.nn.Linear(self.flat_dim, dense_size)
        self.fc2 = torch.nn.Linear(dense_size, 2)     

    def get_output_dim(self, input_size, cconv):        
        with torch.no_grad():
            input = torch.ones(1,*input_size)              
            for conv_i in cconv:                
                input = self.MaxPool(conv_i(input))        
                flatout = int(np.prod(input.size()[1:]))
                print("Flattened output ::", flatout, input.shape)                
        return flatout 
        
    def forward(self, input):        
        for jj, conv_i in enumerate(self.cconv):
            #conv_i.cuda()            
            input = conv_i(input)
            input = self.batchnorm[jj+1](input)
            input = self.ReLU(input)        
            input = self.MaxPool(input)                   
        # flatten the CNN output     
        out = input.view(-1, self.flat_dim) 
        out = self.fc1(out)                       
        out = self.Dropout(out)        
        out = self.fc2(out)      
        return out        

### Hyperparameter settings

In [12]:
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
print('Your GPU device name :', torch.cuda.get_device_name())    

Your GPU device name : GeForce GTX 1050 Ti


In [14]:
from nu_train_utils import train_model

In [15]:
num_epochs = 150 
learning_rate = 1e-3
weight_decay = 1e-4  
batch_size = 64
verbose = 2

# define kernel size in terms of ms length 
timE = 100 #ms
width = timE*fs//1000    

# ker = 8 #timelength//chans 
h, w = 3, 1  #hight and width of a rectangular kernel      

if itemname == 'BNCI2014001':
        kernel_size = [(h, w*width), (h, w*width), (h, w*width),(h, w*width),(h, w*width),(h, w*width)]
        conv_chan   = [1, 64, 32, 16, 8] 
elif itemname == 'Weibo2014':
        kernel_size = [(h, w*width), (h, w*width), (h, w*width),(h, w*width),(h, w*width),(h, w*width)]
        conv_chan   = [1, 64, 32, 16, 8]            
elif itemname == 'Physionet':
        kernel_size = [(h, w*width), (h, w*width), (h, w*width),(h, w*width),(h, w*width),(h, w*width)]
        conv_chan   = [1, 8, 16, 32, 64]

NameError: name 'fs' is not defined

In [37]:
#% used to save the results table 
results = {}        
table = pd.DataFrame(columns = ['Train_Acc', 'Val_Acc', 'Test_Acc', 'Epoch']) 

### Subject select

In [38]:
print("Number of subjects :", len(datt))

Number of subjects : 9


In [39]:
subject = 4
description = 'Subject'+str(subject+1)

dat = datt[subject]

#% get input size (channel x timepoints)
input_size = (1, dat['test_data']['x_test'].shape[-2], 
                 dat['test_data']['x_test'].shape[-1])
print(input_size)

(1, 22, 180)


### Model architecture

In [42]:
# Define the architecture
model = CNN2D(input_size    = input_size, 
              kernel_size   = kernel_size, 
              conv_channels = conv_chan,
              dense_size    = 256, 
              dropout       = 0.5)               

# optimizer and the loss function definition 
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
criterion = torch.nn.CrossEntropyLoss()

model.to(dev)  
criterion.to(dev)       

print("Model architecture >>>", model)

torch.Size([1, 64, 22, 90])
Flattened output :: 126720
torch.Size([1, 32, 22, 45])
Flattened output :: 31680
torch.Size([1, 16, 22, 23])
Flattened output :: 8096
torch.Size([1, 8, 22, 12])
Flattened output :: 2112
Model architecture >>> CNN2D(
  (MaxPool): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
  (ReLU): ReLU()
  (Dropout): Dropout(p=0.5, inplace=False)
  (CNN_K(3, 8)_O64): Conv2d(1, 64, kernel_size=(3, 8), stride=(1, 1), padding=(1, 4))
  (CNN_K(3, 8)_O32): Conv2d(64, 32, kernel_size=(3, 8), stride=(1, 1), padding=(1, 4))
  (CNN_K(3, 8)_O16): Conv2d(32, 16, kernel_size=(3, 8), stride=(1, 1), padding=(1, 4))
  (CNN_K(3, 8)_O8): Conv2d(16, 8, kernel_size=(3, 8), stride=(1, 1), padding=(1, 4))
  (fc1): Linear(in_features=2112, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)


### Training loop 

In [43]:
#******** Training loop *********    
best_model, train_losses, val_losses, train_accs, val_accs, info =\
    train_model(model, dat['dset_loaders'], dat['dset_sizes'], 
                criterion, optimizer, dev, lr_scheduler = None, num_epochs = num_epochs, verbose = verbose)    

test_samples = 50
x_test = dat['test_data']['x_test'][:test_samples,:,:,:] 
y_test = dat['test_data']['y_test'][:test_samples] 
print(x_test.shape)

# predict test data 
preds = best_model(x_test.to(dev)) 
preds_class = preds.data.max(1)[1]

# get the accuracy 
corrects = torch.sum(preds_class == y_test.data.to(dev))     
test_acc = corrects.cpu().numpy()/x_test.shape[0]
print("Test Accuracy :", test_acc) 

# save results       
tab = dict(Train_Acc= train_accs[info['best_epoch']],
           Val_Acc  = val_accs[info['best_epoch']],   
           Test_Acc = test_acc, Epoch = info['best_epoch'] + 1)         

table.loc[description] = tab  
val_acc = np.max(val_accs)

print(table)
results[description] = dict(train_accs = train_accs, val_accs =  val_accs,                                
                            ytrain = info['ytrain'], yval= info['yval'])      

fname = iname + 'CNN_POOLED' + description + '_' + str(val_acc)[:4]
torch.save(best_model.state_dict(), fname) 

Epoch 1/150
train loss: 0.0312, acc: 0.4795
val loss: 0.0180, acc: 0.4796
Epoch 2/150
train loss: 0.0168, acc: 0.5410
val loss: 0.0175, acc: 0.4694
Epoch 3/150
train loss: 0.0129, acc: 0.5615
val loss: 0.0159, acc: 0.5000
Epoch 4/150
train loss: 0.0115, acc: 0.6103
val loss: 0.0168, acc: 0.4796
Epoch 5/150
train loss: 0.0100, acc: 0.6667
val loss: 0.0166, acc: 0.5204
Epoch 6/150
train loss: 0.0095, acc: 0.7192
val loss: 0.0180, acc: 0.5000
Epoch 7/150
train loss: 0.0083, acc: 0.7782
val loss: 0.0203, acc: 0.5000
Epoch 8/150
train loss: 0.0056, acc: 0.8526
val loss: 0.0244, acc: 0.4898
Epoch 9/150
train loss: 0.0034, acc: 0.9231
val loss: 0.0315, acc: 0.5000
Epoch 10/150
train loss: 0.0022, acc: 0.9590
val loss: 0.0293, acc: 0.5204
Epoch 11/150
train loss: 0.0027, acc: 0.9346
val loss: 0.0342, acc: 0.5000
Epoch 12/150
train loss: 0.0013, acc: 0.9744
val loss: 0.0401, acc: 0.5102
Epoch 13/150
train loss: 0.0006, acc: 0.9910
val loss: 0.0453, acc: 0.5204
Epoch 14/150
train loss: 0.0002, a

### Results

In [44]:
# save all the results in one file 
rTable = dict(table = table)
ij = str(np.random.randint(101))
filename = iname + "_CNNSUBSPERES_"+description +ij+ itemname        

with open(filename, 'wb') as ffile:
    pickle.dump(rTable, ffile)   

In [46]:
filename

'BNCI2014001___CNNSUBSPERES_Subject542BNCI2014001'