In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.transforms as T
import numpy as np
import h5py

In [2]:
class EEGDataset(torch.utils.data.Dataset):
    def __init__(self, x, y, train):
        super(EEGDataset).__init__()
        assert x.shape[0] == y.size
        self.x = x
        #temp_y = np.zeros((y.size, 2))
        #for i in range(y.size):
        #    temp_y[i, y[i]] = 1
        #self.y = temp_y
        self.y = [y[i][0] for i in range(y.size)]
        self.train = train
        
    def __getitem__(self,key):
        return (self.x[key], self.y[key])
    
    def __len__(self):
        return len(self.y)

In [3]:
# Load EEG data
NUM_TRAIN = 700

transform = T.Compose([
                T.ToTensor()
            ])
f = h5py.File('child_mind_x_train.mat', 'r')
x_train = f['X_train']
x_train = np.reshape(x_train,(-1,1,24,256))
print('X_train shape: ' + str(x_train.shape))
f = h5py.File('child_mind_y_train.mat', 'r')
y_train = f['Y_train']
print('Y_train shape: ' + str(y_train.shape))
train_data = EEGDataset(x_train, y_train, True)
loader_train = DataLoader(train_data, batch_size=64)

f = h5py.File('child_mind_x_val.mat', 'r')
x_val = f['X_val']
x_val = np.reshape(x_val,(-1,1,24,256))
print('X_val shape: ' + str(x_val.shape))
f = h5py.File('child_mind_y_val.mat', 'r')
y_val = f['Y_val']
print('Y_val shape: ' + str(y_val.shape))
val_data = EEGDataset(x_val, y_val, True)
loader_val = DataLoader(val_data, batch_size=64)

X_train shape: (51097, 1, 24, 256)
Y_train shape: (51097, 1)
X_val shape: (29274, 1, 24, 256)
Y_val shape: (29274, 1)


In [5]:
labels = [y_train[i][0] for i in range(y_train.size)]

In [8]:
np.histogram(labels)

(array([41944,     0,     0,     0,     0,     0,     0,     0,     0,
         9153]),
 array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]))

In [11]:
x_train[0]

array([[[ 84.08093  ,  20.43338  ,  20.751617 , ...,  -1.90271  ,
          22.990479 ,  29.13623  ],
        [  0.5722656,  14.500916 ,  20.264587 , ...,  -8.235443 ,
         -32.589264 ,  46.441437 ],
        [ 42.12967  , -10.080566 ,  41.760773 , ..., -10.332001 ,
          -1.9408264,  44.158447 ],
        ...,
        [-62.94983  , -11.334473 , -37.815186 , ...,   3.7601318,
          -4.9518433, -21.518494 ],
        [  2.3652954,   0.5593872,   0.7571411, ..., -33.310913 ,
          -7.4692383, -71.837524 ],
        [-73.627686 , -23.33606  , -22.46582  , ...,  10.324524 ,
          16.434875 , -24.301025 ]]], dtype=float32)

In [17]:
# Test with MNIST
import torchvision.datasets as dset
NUM_TRAIN = 40000
transform = T.Compose([
                T.CenterCrop(24),
                T.Pad((116,0)),
                T.ToTensor(),
            ])
mnist_train = dset.MNIST('./mnist', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(mnist_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

mnist_val = dset.MNIST('./mnist', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(mnist_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 60000)))

mnist_test = dset.MNIST('./mnist', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(mnist_test, batch_size=64)

In [21]:
len(loader_val)

313

In [27]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cpu


In [28]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [29]:
def train(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(loader_val, model)
                print()

In [33]:
model = nn.Sequential(
                      nn.Conv2d(1,100,3),
                      nn.ReLU(),
                      nn.MaxPool2d(2, 2),
                      nn.Dropout(0.25),
                      nn.Conv2d(100,100,3),
                      nn.ReLU(),
                      nn.MaxPool2d(2, 2),
                      nn.Dropout(0.25),
                      nn.Conv2d(100,300,(2,3)),
                      nn.ReLU(),
                      nn.MaxPool2d(2, 2),
                      nn.Dropout(0.25),
                      nn.Conv2d(300,300,(1,7)),
                      nn.ReLU(),
                      nn.MaxPool2d((1,2), stride=1),
                      nn.Dropout(0.25),
                      nn.Conv2d(300,100,(1,3)),
                      nn.Conv2d(100,100,(1,3)),
                      nn.Flatten(),
                      nn.Linear(1900,6144),
                      nn.Linear(6144,10),
)

pred = model(next(iter(loader_train))[0])

In [34]:
print(pred.shape)

torch.Size([64, 10])


In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)
train(model, optimizer)

Iteration 0, loss = 2.8361
Checking accuracy on validation set
Got 2050 / 20000 correct (10.25)

Iteration 100, loss = 2.3367
Checking accuracy on validation set
Got 1933 / 20000 correct (9.66)

Iteration 200, loss = 2.3048
Checking accuracy on validation set
Got 1933 / 20000 correct (9.66)

Iteration 300, loss = 2.2937
Checking accuracy on validation set
Got 2179 / 20000 correct (10.90)

Iteration 400, loss = 2.2976
Checking accuracy on validation set
Got 2179 / 20000 correct (10.90)

Iteration 500, loss = 2.3124
Checking accuracy on validation set
Got 1991 / 20000 correct (9.96)

Iteration 600, loss = 2.3102
Checking accuracy on validation set
Got 1999 / 20000 correct (9.99)



In [23]:
best_model = model
check_accuracy(loader_test, best_model)

Checking accuracy on test set
Got 1028 / 10000 correct (10.28)
