In [1]:
import datetime
import numpy as np
import os
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.autograd import Variable

from sklearn.metrics import accuracy_score, confusion_matrix
from lstm import ivie_data

In [2]:
cuda_enabled = torch.cuda.is_available()

In [3]:
cuda_enabled

True

In [4]:
BASELINE_START = "baselinestart"
BASELINE_END = "baselineend"
EASY_START = "easystart"
EASY_END = "easyend"
HARD_START = "hardstart"
HARD_END = "hardend"

EASY_DIFFICULTY = 0
HARD_DIFFICULTY = 1

In [5]:
"""Gets the row blocks for easy and hard tasks
"""
def read_data(fnirs_path, marker_path):
    fnirs_df =  pd.read_csv(fnirs_path, sep='\t', skiprows=range(4), index_col=False)
    marker_df = pd.read_csv(marker_path, sep='\t', skiprows=range(4), index_col=False)
    
    merged_df = pd.merge(fnirs_df, marker_df, on="Matlab_now", how="left")
    
    return merged_df

In [6]:
def get_row_blocks(merged_df):
    easy_start_rows = merged_df.index[merged_df.Stimulus_Label == EASY_START].tolist()
    easy_end_rows = merged_df.index[merged_df.Stimulus_Label == EASY_END].tolist()
    hard_start_rows = merged_df.index[merged_df.Stimulus_Label == HARD_START].tolist()
    hard_end_rows = merged_df.index[merged_df.Stimulus_Label == HARD_END].tolist()
    
    easy_rows = list(zip(easy_start_rows, easy_end_rows))
    hard_rows = list(zip(hard_start_rows, hard_end_rows))
    
    return (easy_rows, hard_rows)

In [7]:
"""Return subset of df determined by the indices of the row blocks
"""
def get_subsets(merged_df, row_blocks, difficulty):
    tables = []
    column_names = ["Matlab_now", "A-DC1", "A-DC2", "A-DC3", "A-DC4", "A-DC5",
                    "A-DC6", "A-DC7", "A-DC8", "B-DC1", "B-DC2", "B-DC3", 
                    "B-DC4", "B-DC5", "B-DC6", "B-DC7", "B-DC8"]
    column_indices = [merged_df.columns.get_loc(c) for c in column_names]
    for row_block in row_blocks:
        df = merged_df.iloc[row_block[0]:row_block[1], column_indices]
        start_time = df.iloc[0]["Matlab_now"]
        df["Matlab_now"] = df["Matlab_now"] - start_time
        df["Diffculty"] = difficulty

        tables.append(df)
    return tables

In [8]:
"""Extract features from given dataset
    :param data_path: Directory containing the files
    
    :return: gets all the easy and hard features from a given dataset
"""
def get_mfcc(data_path):
    fnirs_path = os.path.join(os.getcwd(), data_path, "fNIRSdata.txt")
    marker_path = os.path.join(os.getcwd(), data_path, "markers.txt")
    merged_df = read_data(fnirs_path, marker_path)
    easy_rows, hard_rows = get_row_blocks(merged_df)
    
    easy_tables = get_subsets(merged_df, easy_rows, EASY_DIFFICULTY)
    hard_tables = get_subsets(merged_df, hard_rows, HARD_DIFFICULTY)

    return easy_tables, hard_tables

In [9]:
easy_tables_902, hard_tables_902 = get_mfcc("/home/jin/workspace/hci_lab/data/S902/2015-02-26_11-24-48-120")

In [10]:
train = easy_tables_902[0][:-1]
dev = easy_tables_902[1]
test = easy_tables_902[2][:-1]
print(train.shape)
print(dev.shape)
print(test.shape)
input_size = train.shape[0]

(353, 18)
(353, 18)
(353, 18)


In [11]:
batch_size = 64
kwargs = {"num_workers": 1, "pin_memory": True}
if not cuda_enabled:
    kwargs["pin_memory"] = False
    batch_size = 32

In [12]:
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, **kwargs)

In [13]:
hidden_size = 128
num_layers = 2
num_classes = 2
learning_rate = 0.0001
num_epochs = 20

In [16]:
for x in train_loader:
    print(x)

KeyError: 'Traceback (most recent call last):\n  File "/home/jin/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 2656, in get_loc\n    return self._engine.get_loc(key)\n  File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc\n  File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc\n  File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item\n  File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 316\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File "/home/jin/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop\n    samples = collate_fn([dataset[i] for i in batch_indices])\n  File "/home/jin/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>\n    samples = collate_fn([dataset[i] for i in batch_indices])\n  File "/home/jin/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py", line 2927, in __getitem__\n    indexer = self.columns.get_loc(key)\n  File "/home/jin/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 2658, in get_loc\n    return self._engine.get_loc(self._maybe_cast_indexer(key))\n  File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc\n  File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc\n  File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item\n  File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 316\n'

In [None]:
rnn = ivie_data.BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.is_training = True

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

epoch_loss = 5000.
    # Train it

# for epoch in range(num_epochs):
#     loss_total = 0.
#     iteration_count = 0.
#     for i, (mfcc, labels) in enumerate(train_loader):
#         print(labels)
#         iteration_count += 1.
#         mfcc = Variable(mfcc.view(-1, sequence_length, input_size))
#         labels = Variable(labels)
#         if cuda_enabled:
#             mfcc = mfcc.cuda()
#             labels = labels.cuda()

#         # Forward + Backward + Optimize
#         optimizer.zero_grad()
#         outputs = rnn(mfcc)

#         loss = criterion(outputs, labels)
#         loss_total += loss.data[0]
#         loss.backward()
#         optimizer.step()

#         if (i + 1) % 10 == 0:
#             print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
#                     % (epoch + 1, num_epochs, i + 1, len(train) // batch_size, loss.data[0]))
#     current_epoch_loss = loss_total / iteration_count
#         # Optimise training epochs: only continue training while the loss drops
#     if current_epoch_loss >= epoch_loss:
#         break
#     epoch_loss = current_epoch_loss
