In [105]:
import datetime
import numpy as np
import os
import pandas as pd
from pandas import Series

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.autograd import Variable

from sklearn.metrics import accuracy_score, confusion_matrix
from lstm import ivie_data

In [148]:
INPUT_SIZE = 352

In [2]:
cuda_enabled = torch.cuda.is_available()

In [3]:
cuda_enabled

False

In [116]:
class MyDataset(Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, data, columns):
        'Initialization'
        self.data = data
        self.columns = columns

  def __len__(self):
        'Denotes the total number of samples'
        return self.data.shape[0]

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        #ID = self.data.iloc[index]

        # Load data and get label
        X = torch.FloatTensor(Series.tolist(self.data[columns[:-1]].iloc[index]))
        y = self.data[columns[-1]].iloc[index]

        return X, y

In [4]:
BASELINE_START = "baselinestart"
BASELINE_END = "baselineend"
EASY_START = "easystart"
EASY_END = "easyend"
HARD_START = "hardstart"
HARD_END = "hardend"

EASY_DIFFICULTY = 0
HARD_DIFFICULTY = 1

In [5]:
"""Gets the row blocks for easy and hard tasks
"""
def read_data(fnirs_path, marker_path):
    fnirs_df =  pd.read_csv(fnirs_path, sep='\t', skiprows=range(4), index_col=False)
    marker_df = pd.read_csv(marker_path, sep='\t', skiprows=range(4), index_col=False)
    
    merged_df = pd.merge(fnirs_df, marker_df, on="Matlab_now", how="left")
    
    return merged_df

In [6]:
def get_row_blocks(merged_df):
    easy_start_rows = merged_df.index[merged_df.Stimulus_Label == EASY_START].tolist()
    easy_end_rows = merged_df.index[merged_df.Stimulus_Label == EASY_END].tolist()
    hard_start_rows = merged_df.index[merged_df.Stimulus_Label == HARD_START].tolist()
    hard_end_rows = merged_df.index[merged_df.Stimulus_Label == HARD_END].tolist()
    
    easy_rows = list(zip(easy_start_rows, easy_end_rows))
    hard_rows = list(zip(hard_start_rows, hard_end_rows))
    
    return (easy_rows, hard_rows)

In [149]:
"""Return subset of df determined by the indices of the row blocks
"""
def get_subsets(merged_df, row_blocks, difficulty):
    tables = []
    column_names = ["Matlab_now", "A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
                    "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
                    "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8"]
    column_indices = [merged_df.columns.get_loc(c) for c in column_names]
    for row_block in row_blocks:
        df = merged_df.iloc[row_block[0]:row_block[1], column_indices]
        start_time = df.iloc[0]["Matlab_now"]
        df["Matlab_now"] = df["Matlab_now"] - start_time
        df["Difficulty"] = difficulty

        tables.append(df.iloc[:INPUT_SIZE])
    return tables

In [150]:
"""Extract features from given dataset
    :param data_path: Directory containing the files
    
    :return: gets all the easy and hard features from a given dataset
"""
def get_data(data_path):
    fnirs_path = os.path.join(os.getcwd(), data_path, "fNIRSdata.txt")
    marker_path = os.path.join(os.getcwd(), data_path, "markers.txt")
    merged_df = read_data(fnirs_path, marker_path)
    easy_rows, hard_rows = get_row_blocks(merged_df)
    
    easy_tables = get_subsets(merged_df, easy_rows, EASY_DIFFICULTY)
    hard_tables = get_subsets(merged_df, hard_rows, HARD_DIFFICULTY)

    return easy_tables, hard_tables

In [151]:
easy_tables_902, hard_tables_902 = get_data("/Users/sjjin/workspace/hci_lab/data/S902/2015-02-26_11-24-48-120")
easy_tables_903, hard_tables_903 = get_data("/Users/sjjin/workspace/hci_lab/data/S903/2015-02-27_13-20-42-120/")

In [152]:
train = pd.concat(easy_tables_902 + easy_tables_903 + hard_tables_902 + hard_tables_903)

In [153]:
train_x = train[:-1]
#dev = easy_tables_902[1]
#test = easy_tables_902[2][:-1]
print(train_x.shape)
print(dev.shape)
print(test.shape)
input_size = train.shape[0]

(15487, 18)
(353, 18)
(353, 18)


In [163]:
15488/352

44.0

In [58]:
columns = ["Matlab_now", "A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
           "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
           "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8", "Difficulty"]

In [20]:
batch_size = 32

In [21]:
params = {'shuffle': True,
          'num_workers': 6}

In [117]:
data = MyDataset(train, columns)

In [164]:
type(data)

torch.Tensor

In [118]:
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

In [161]:
hidden_size = 128
num_layers = 2
num_classes = 2
learning_rate = 0.0001
num_epochs = 20
sequence_length = 16 # column size. get rid of time unless time difference is consistent
#input_size = 0 # longest length of ti

In [162]:
rnn = ivie_data.BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.is_training = True

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

epoch_loss = 5.
    # Train it

for epoch in range(num_epochs):
    loss_total = 0.
    iteration_count = 0.
    for i, (data, label) in enumerate(train_loader):
        data = Variable(data.view(-1, sequence_length, INPUT_SIZE))
        label = Variable(label)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(data)

        loss = criterion(outputs, label)
        loss_total += loss.data[0]
        loss.backward()
        optimizer.step()

        if (i + 1) % 10 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                % (epoch + 1, num_epochs, i + 1, len(train) // batch_size, loss.data[0]))
    current_epoch_loss = loss_total / iteration_count
    # Optimise training epochs: only continue training while the loss drops
    if current_epoch_loss >= epoch_loss:
        break
    epoch_loss = current_epoch_loss
        


RuntimeError: shape '[-1, 16, 352]' is invalid for input of size 544