In [2]:
import datetime
import numpy as np
import os
import pandas as pd
from pandas import Series

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.autograd import Variable

from sklearn.metrics import accuracy_score, confusion_matrix
from lstm import ivie_data

In [106]:
cuda_enabled = torch.cuda.is_available()

In [107]:
cuda_enabled

True

In [150]:
torch.FloatTensor([0])

tensor([0.])

In [7]:
BASELINE_START = "baselinestart"
BASELINE_END = "baselineend"
EASY_START = "easystart"
EASY_END = "easyend"
HARD_START = "hardstart"
HARD_END = "hardend"

EASY_DIFFICULTY = 0
HARD_DIFFICULTY = 1

In [8]:
"""Gets the row blocks for easy and hard tasks
"""
def read_data(fnirs_path, marker_path):
    fnirs_df =  pd.read_csv(fnirs_path, sep='\t', skiprows=range(4), index_col=False)
    marker_df = pd.read_csv(marker_path, sep='\t', skiprows=range(4), index_col=False)
    
    merged_df = pd.merge(fnirs_df, marker_df, on="Matlab_now", how="left")
    
    return merged_df

In [9]:
def get_row_blocks(merged_df):
    easy_start_rows = merged_df.index[merged_df.Stimulus_Label == EASY_START].tolist()
    easy_end_rows = merged_df.index[merged_df.Stimulus_Label == EASY_END].tolist()
    hard_start_rows = merged_df.index[merged_df.Stimulus_Label == HARD_START].tolist()
    hard_end_rows = merged_df.index[merged_df.Stimulus_Label == HARD_END].tolist()
    
    easy_rows = list(zip(easy_start_rows, easy_end_rows))
    hard_rows = list(zip(hard_start_rows, hard_end_rows))
    
    return (easy_rows, hard_rows)

In [10]:
"""Return subset of df determined by the indices of the row blocks
"""
def get_subsets(merged_df, row_blocks, difficulty):
    tables = []
    column_names = ["Matlab_now", "A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
                    "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
                    "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8"]
    column_indices = [merged_df.columns.get_loc(c) for c in column_names]
    for row_block in row_blocks:
        df = merged_df.iloc[row_block[0]:row_block[1], column_indices]
        start_time = df.iloc[0]["Matlab_now"]
        df["Matlab_now"] = df["Matlab_now"] - start_time
        df["Difficulty"] = difficulty

        tables.append(df.iloc[:INPUT_SIZE])
    return tables

In [11]:
"""Extract features from given dataset
    :param data_path: Directory containing the files
    
    :return: gets all the easy and hard features from a given dataset
"""
def get_data(data_path):
    fnirs_path = os.path.join(os.getcwd(), data_path, "fNIRSdata.txt")
    marker_path = os.path.join(os.getcwd(), data_path, "markers.txt")
    merged_df = read_data(fnirs_path, marker_path)
    easy_rows, hard_rows = get_row_blocks(merged_df)
    
    easy_tables = get_subsets(merged_df, easy_rows, EASY_DIFFICULTY)
    hard_tables = get_subsets(merged_df, hard_rows, HARD_DIFFICULTY)

    return easy_tables, hard_tables

In [15]:
easy_tables_902, hard_tables_902 = get_data("data/S902/2015-02-26_11-24-48-120")
easy_tables_903, hard_tables_903 = get_data("data/S903/2015-02-27_13-20-42-120/")

In [62]:
train_sets = [easy_tables_902, easy_tables_903, hard_tables_902, hard_tables_903]
type(train_sets[0])

list

In [61]:
X = [a.iloc[:,1:-1] for a in train_sets]
y = [a.iloc[0,-1] for a in train_sets]

AttributeError: 'list' object has no attribute 'iloc'

In [72]:
train = easy_tables_902 + easy_tables_903 + hard_tables_902 + hard_tables_903

In [75]:
columns = ["A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
           "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
           "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8", "Difficulty"]

In [76]:
train_x = [a.iloc[:,1:-1] for a in train]
train_y = [a.iloc[0,-1] for a in train]

In [151]:
batch_size = 32

In [152]:
params = {'shuffle': True,
          'num_workers': 6}

In [172]:
class MyDataset(Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, X, Y):
        'Initialization'
        self.X = X
        self.Y = Y

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.X)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        #ID = self.data.iloc[index]

        # Load data and get label
        X = torch.FloatTensor(Series.tolist(self.X[index]))
        Y = torch.FloatTensor([self.Y[index]])
        #Y = self.Y[index]
        #import pdb; pdb.set_trace()

        return X, Y

In [173]:
data = MyDataset(train_x, train_y)

In [174]:
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

In [164]:
hidden_size = 64
num_layers = 2
num_classes = 2
learning_rate = 0.0001
num_epochs = 5
sequence_length = 16 # column size. get rid of time unless time difference is consistent
input_size = 352 # longest length of ti

In [177]:
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.is_training = True

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

epoch_loss = 5.
    # Train it

for epoch in range(num_epochs):
    loss_total = 0.
    iteration_count = 0.
    for i, (data, label) in enumerate(train_loader):
        data = Variable(data.view(-1, sequence_length, 352))
        label = Variable(label)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(data)

        loss = criterion(outputs, label)
        loss_total += loss.data[0]
        loss.backward()
        optimizer.step()

        if (i + 1) % 10 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                % (epoch + 1, num_epochs, i + 1, len(train) // batch_size, loss.data[0]))
    current_epoch_loss = loss_total / iteration_count
    # Optimise training epochs: only continue training while the loss drops
    if current_epoch_loss >= epoch_loss:
        break
    epoch_loss = current_epoch_loss
        


ValueError: only one element tensors can be converted to Python scalars

In [176]:
class BiRNN(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.is_training = False
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, bidirectional=True)
        #self.fc = nn.Dropout(p=0.75, inplace=False)
        self.fc = nn.Dropout(p=0.5, inplace=False)
        self.linear = nn.Linear(self.hidden_size*2, self.num_classes)

        if not cuda_enabled:
            self.lstm = self.lstm.cuda()
            self.fc = self.fc.cuda()
            self.linear = self.linear.cuda()

    def forward(self, x):
        # Set initial states
        h0 = Variable(torch.zeros(self.num_layers*2, x[0].item(), self.hidden_size)) # 2 for bidirection
        c0 = Variable(torch.zeros(self.num_layers*2, x[0].item(), self.hidden_size))
        #h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size, dtype=torch.float64)) # 2 for bidirection
        #c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size, dtype=torch.float64))
        if not cuda_enabled:
            h0 = h0.cuda()  # 2 for bidirection
            c0 = c0.cuda()

        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode hidden state of last time step
        if self.is_training:
            out = self.fc(out[:, -1, :])
        else:
            out = out[:, -1, :]

        out = F.log_softmax(self.linear(out), dim=1)
        return out

In [None]:
class BiRNN2(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.is_training = False
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, bidirectional=True)
        #self.fc = nn.Dropout(p=0.75, inplace=False)
        self.fc = nn.Dropout(p=0.5, inplace=False)
        self.linear = nn.Linear(self.hidden_size*2, self.num_classes)

        if not cuda_enabled:
            self.lstm = self.lstm.cuda()
            self.fc = self.fc.cuda()
            self.linear = self.linear.cuda()

    def forward(self, x):
        # Set initial states
        h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) # 2 for bidirection
        c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size))
        if not cuda_enabled:
            h0 = h0.cuda()  # 2 for bidirection
            c0 = c0.cuda()

        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode hidden state of last time step
        if self.is_training:
            out = self.fc(out[:, -1, :])
        else:
            out = out[:, -1, :]

        out = F.log_softmax(self.linear(out), dim=1)
        return out