In [1]:
import datetime
import numpy as np
import os
import pandas as pd
from pandas import Series

#from lstm.ivie_data import BiRNN
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.autograd import Variable

from sklearn.metrics import accuracy_score, confusion_matrix
from lstm import ivie_data

In [25]:
cuda_enabled = torch.cuda.is_available()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda_enabled

True

In [4]:
torch.FloatTensor([0])

tensor([0.])

In [5]:
BASELINE_START = "baselinestart"
BASELINE_END = "baselineend"
EASY_START = "easystart"
EASY_END = "easyend"
HARD_START = "hardstart"
HARD_END = "hardend"

INPUT_SIZE = 352 # determined by the shortest test sample

EASY_DIFFICULTY = 0
HARD_DIFFICULTY = 1

In [6]:
"""Gets the row blocks for easy and hard tasks
"""
def read_data(fnirs_path, marker_path):
    fnirs_df =  pd.read_csv(fnirs_path, sep='\t', skiprows=range(4), index_col=False)
    marker_df = pd.read_csv(marker_path, sep='\t', skiprows=range(4), index_col=False)
    
    merged_df = pd.merge(fnirs_df, marker_df, on="Matlab_now", how="left")
    
    return merged_df

In [7]:
def get_row_blocks(merged_df):
    easy_start_rows = merged_df.index[merged_df.Stimulus_Label == EASY_START].tolist()
    easy_end_rows = merged_df.index[merged_df.Stimulus_Label == EASY_END].tolist()
    hard_start_rows = merged_df.index[merged_df.Stimulus_Label == HARD_START].tolist()
    hard_end_rows = merged_df.index[merged_df.Stimulus_Label == HARD_END].tolist()
    
    easy_rows = list(zip(easy_start_rows, easy_end_rows))
    hard_rows = list(zip(hard_start_rows, hard_end_rows))
    
    return (easy_rows, hard_rows)

In [8]:
"""Return subset of df determined by the indices of the row blocks
"""
def get_subsets(merged_df, row_blocks, difficulty):
    tables = []
    column_names = ["Matlab_now", "A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
                    "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
                    "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8"]
    column_indices = [merged_df.columns.get_loc(c) for c in column_names]
    for row_block in row_blocks:
        df = merged_df.iloc[row_block[0]:row_block[1], column_indices]
        start_time = df.iloc[0]["Matlab_now"]
        df["Matlab_now"] = df["Matlab_now"] - start_time
        df["Difficulty"] = difficulty

        tables.append(df.iloc[:INPUT_SIZE])
    return tables

In [9]:
"""Extract features from given dataset
    :param data_path: Directory containing the files
    
    :return: gets all the easy and hard features from a given dataset
"""
def get_data(data_path):
    fnirs_path = os.path.join(os.getcwd(), data_path, "fNIRSdata.txt")
    marker_path = os.path.join(os.getcwd(), data_path, "markers.txt")
    merged_df = read_data(fnirs_path, marker_path)
    easy_rows, hard_rows = get_row_blocks(merged_df)
    
    easy_tables = get_subsets(merged_df, easy_rows, EASY_DIFFICULTY)
    hard_tables = get_subsets(merged_df, hard_rows, HARD_DIFFICULTY)

    return easy_tables, hard_tables

In [10]:
easy_tables_902, hard_tables_902 = get_data("data/S902/2015-02-26_11-24-48-120")
easy_tables_903, hard_tables_903 = get_data("data/S903/2015-02-27_13-20-42-120/")

In [11]:
train = easy_tables_902 + easy_tables_903 + hard_tables_902 + hard_tables_903

In [12]:
columns = ["A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
           "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
           "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8", "Difficulty"]

In [13]:
train_x = [a.iloc[:,1:-1] for a in train]
train_y = [a.iloc[0,-1] for a in train]

torch.tensor(targets_df['targets'].values)

In [14]:
torch.tensor(train_x[0].values).shape

torch.Size([352, 16])

In [15]:
torch.tensor(train_y[0])

tensor(0)

In [16]:
train_x[0].shape

(352, 16)

In [18]:
params = {'shuffle': True,
          'num_workers': 6}

In [65]:
class fnirs(Dataset):
    #Characterizes a dataset for PyTorch
    def __init__(self, data, labels):
        #Initialization
        self.data = data
        self.labels = labels

    def __len__(self):
        #Denotes the total number of samples
        return len(self.data)

    def __getitem__(self, index):
        #Generates one sample of data
        x = torch.tensor(self.data[index].values).to(device)
        y = torch.tensor([self.labels[index]]).to(device)

        return x, y

In [73]:
class BiRNN(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.is_training = False
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, bidirectional=True)
        #self.fc = nn.Dropout(p=0.75, inplace=False)
        self.fc = nn.Dropout(p=0.5, inplace=False)
        self.linear = nn.Linear(self.hidden_size*2, self.num_classes)

        if cuda_enabled:
            self.lstm = self.lstm.cuda()
            self.fc = self.fc.cuda()
            self.linear = self.linear.cuda()

    def forward(self, x): 
        # Set initial states
        h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device) # 2 for bidirection
        c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device)
        if cuda_enabled:
            h0 = h0.cuda()  # 2 for bidirection
            c0 = c0.cuda()

        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode hidden state of last time step
        if self.is_training:
            out = self.fc(out[:, -1, :]) 
        else:
            out = out[:, -1, :]

        out = F.log_softmax(self.linear(out), dim=1)
        return out 

In [61]:
hidden_size = 8
num_layers = 2
num_classes = 2
learning_rate = 0.0001
num_epochs = 5
sequence_length = 16 # column size. get rid of time unless time difference is consistent
input_size = 352 # longest length of ti
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.is_training = True

data = fnirs(train_x, train_y)
train_loader = torch.utils.data.DataLoader(data, batch_size=2, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

epoch_loss = 5.
    # Train it

for epoch in range(num_epochs):
    loss_total = 0.
    iteration_count = 0.
    for i, (data, label) in enumerate(train_loader):
        data = Variable(data.view(-1, sequence_length, input_size))
        label = Variable(label)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(data)

        loss = criterion(outputs, label)
        loss_total += loss.data[0]
        loss.backward()
        optimizer.step()

    current_epoch_loss = loss_total / iteration_count
    # Optimise training epochs: only continue training while the loss drops
    if current_epoch_loss >= epoch_loss:
        break
    epoch_loss = current_epoch_loss
        


RuntimeError: cuDNN error: CUDNN_STATUS_BAD_PARAM