In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Libraries

In [70]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from verbio import readers, preprocessing, temporal, features, settings
from sklearn.metrics import f1_score
from collections import defaultdict

Constants

In [138]:
SUBJECT_LABELS = [f'P{p:03d}' for p in range(1, 74, 1)] # Participants 001-073
TRAIN_SESSIONS = ['TEST01','TEST02','TEST03','TEST04']
TEST_SESSIONS = ['TEST05','TEST06','TEST07','TEST08']
BATCH_SIZE = 16
PRETRAIN_EPOCHS = 5
TRAIN_EPOCHS = 200
DATA_DIR = '/home/jason/hubbs/project_verbio/data/raw/'
EDA_FILENAME = 'E4_EDA_PPT.xlsx'
HR_FILENAME = 'E4_HR_PPT.xlsx'
ANNOTATION_FILENAME = 'MANUAL_ANNOTATION_PPT.xlsx'
WIN_LEN = 10
WIN_STRIDE = 5

Helper functions

In [66]:
def get_data(participant, session):
    eda_filepath = os.path.join(DATA_DIR, participant, session, EDA_FILENAME)
    hr_filepath = os.path.join(DATA_DIR, participant, session, HR_FILENAME)
    annotation_filepath = os.path.join(DATA_DIR, participant, session, ANNOTATION_FILENAME)
    
    if any(not os.path.exists(x) for x in (eda_filepath, hr_filepath, annotation_filepath)): return None
    
    eda_df = readers.read_excel(eda_filepath)
    hr_df = readers.read_excel(hr_filepath)
    annotation_df = readers.read_excel(annotation_filepath)
    
    eda_fx = get_eda_fx(eda_df)
    hr_fx = get_hr_fx(hr_df)
    annotation_fx = get_annotation_fx(annotation_df)

    min_len = min(len(annotation_fx), len(eda_fx), len(hr_fx))
    y = annotation_fx[:min_len]
    eda_fx = eda_fx.iloc[:min_len]
    hr_fx = hr_fx[:min_len]
    
    x_df = eda_fx
    x_df['HR'] = hr_fx
    
    x = x_df.to_numpy()
    return x, y
    
def get_eda_fx(eda_df):
    # Convert EDA signals to numpy
    eda_signal = eda_df['EDA'].to_numpy()
    eda_times = eda_df[settings.time_key].to_numpy()
    # Get EDA features
    eda_fx = features.eda_features(
        signal=eda_signal, 
        times=eda_times, 
        sr=settings.e4_eda_sr, 
        win_len=WIN_LEN, 
        win_stride=WIN_STRIDE
    )[['SCR_Peaks', 'SCR_Amplitude', 'SCL']]
    return eda_fx

def get_hr_fx(hr_df):
    # Convert HR signals to numpy
    hr_signal = hr_df['HR'].to_numpy()
    hr_times = hr_df[settings.time_key].to_numpy()
    # Window HR
    hr_fx = preprocessing.window_timed(
        x=hr_signal,
        times=hr_times,
        win_len=WIN_LEN,
        win_stride=WIN_STRIDE,
        win_fn=lambda x: np.mean(x)
    )
    return np.array(hr_fx)

def get_annotation_fx(annotation_df):
    # Convert annotation signals to numpy
    annotation_r1 = annotation_df['R1'].to_numpy()
    annotation_r2 = annotation_df['R2'].to_numpy()
    annotation_r4 = annotation_df['R4'].to_numpy()
    annotation_r5 = annotation_df['R5'].to_numpy()
    annotation_times = annotation_df[settings.time_key].to_numpy()
    # Combine both annotators
    annotation_mixed = np.vstack([annotation_r1, annotation_r2, annotation_r4, annotation_r5])
    annotation_mean = np.mean(annotation_mixed, axis=0)
    # Window annotations
    annotation_fx = preprocessing.window_timed(
        x=annotation_mean,
        times=annotation_times,
        win_len=WIN_LEN,
        win_stride=WIN_STRIDE,
        win_fn=lambda x: preprocessing.binarize(np.mean(x), threshold=2.5)
    )
    annotation_fx = np.array(annotation_fx, dtype='int')    
    # Shift annotations back in time
    assert WIN_LEN % WIN_STRIDE < 0.1 # Assert that they're at least somewhat divisible
    shift_len = -int(WIN_LEN//WIN_STRIDE)
    return temporal.shift(annotation_fx, shift_len)[:shift_len] # Shift back in time and truncate

In [135]:
class VerBIODataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits
    
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct, test_f1 = 0, 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    ps, ns = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            print(pred.argmax(1))
            test_loss += loss_fn(pred, y).item()
            test_f1 += f1_score(y.cpu(), pred.argmax(1).cpu())
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            tp += ((pred.argmax(1) == 1) & (pred.argmax(1) == y)).type(torch.float).sum().item()
            tn += ((pred.argmax(1) == 0) & (pred.argmax(1) == y)).type(torch.float).sum().item()
            fp += ((pred.argmax(1) == 1) & (pred.argmax(1) != y)).type(torch.float).sum().item()
            fn += ((pred.argmax(1) == 0) & (pred.argmax(1) != y)).type(torch.float).sum().item()
            ps += (y == 1).type(torch.float).sum().item()
            ns += (y == 0).type(torch.float).sum().item()
    test_loss /= num_batches
    test_f1 /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} Avg F1: {test_f1:>8f}")
    print(f"tp: {tp} -- tn: {tn} -- fp: {fp} -- fn: {fn}")
    print(f"ps: {ps} -- ns: {ns} \n ")

Grab raw data from VerBIO dataset for training and testing sessions

In [80]:
train_dict = {}
test_dict = {}

for p in SUBJECT_LABELS:
    valid = True
    participant_train = []
    participant_test = []

    for s in TRAIN_SESSIONS:
        session_data = get_data(p, s)
        if session_data is None:
            valid = False
            break
        else:
            participant_train.append(session_data)
    
    for s in TEST_SESSIONS:
        session_data = get_data(p, s)
        if session_data is None:
            valid = False
            break
        else:
            participant_test.append(session_data)
            
    if valid:
        print(f'Valid participant {p}')
        train_dict[p] = participant_train
        test_dict[p] = participant_test

Valid participant P004
Valid participant P005
Valid participant P008
Valid participant P016
Valid participant P020
Valid participant P021
Valid participant P023
Valid participant P032
Valid participant P035
Valid participant P037
Valid participant P039
Valid participant P041
Valid participant P042
Valid participant P044
Valid participant P047
Valid participant P050
Valid participant P051
Valid participant P053
Valid participant P060
Valid participant P061
Valid participant P062
Valid participant P065
Valid participant P071
Valid participant P073


Initialize PyTorch things

In [81]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

loss_fn = nn.CrossEntropyLoss()

Using cuda device


Run experiment loop

In [137]:
for target_p in train_dict.keys():
    
    aux_participants = set(train_dict.keys())
    aux_participants.remove(target_p)
    
    x_pretrain = []
    y_pretrain = []
    
    for p in aux_participants:
        p_data = train_dict[p]
        p_x = [z[0] for z in p_data]
        p_y = [z[1] for z in p_data]
        x_pretrain.append(np.concatenate(p_x, axis=0))
        y_pretrain.append(np.concatenate(p_y, axis=0))
    x_pretrain = np.concatenate(x_pretrain).astype(np.float32)
    y_pretrain = np.concatenate(y_pretrain).astype(int)
    
    p_data = train_dict[target_p]
    x_train = np.concatenate([z[0] for z in p_data], axis=0).astype(np.float32)
    y_train = np.concatenate([z[1] for z in p_data], axis=0).astype(int)
    
    p_data = test_dict[target_p]
    x_test = np.concatenate([z[0] for z in p_data], axis=0).astype(np.float32)
    y_test = np.concatenate([z[1] for z in p_data], axis=0).astype(int)
    
    pretrain_c0 = sum(y_pretrain == 0)
    pretrain_c1 = sum(y_pretrain == 1)
    train_c0 = sum(y_train == 0)
    train_c1 = sum(y_train == 1)
    test_c0 = sum(y_test == 0)
    test_c1 = sum(y_test == 1)
    print(f'Pretrain: c0: {pretrain_c0} | c1: {pretrain_c1}')
    print(f'Train: c0: {train_c0} | c1: {train_c1}')
    print(f'Test: c0: {test_c0} | c1: {test_c1}')

    
    pretrain_dataset = VerBIODataset(x_pretrain, y_pretrain)
    train_dataset = VerBIODataset(x_train, y_train)
    test_dataset = VerBIODataset(x_test, y_test)
    
    pretrain_dataloader = DataLoader(pretrain_dataset, batch_size=BATCH_SIZE, shuffle=True)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    # Initialize model
    model = NeuralNetwork(input_size=4, hidden_size=4, output_size=2).to(device)
    #print(model)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    print(f'===For participant {target_p}===')
    for t in range(PRETRAIN_EPOCHS):
        #print(f"Pretrain epoch {t+1}\n-------------------------------")
        train(pretrain_dataloader, model, loss_fn, optimizer)
        #print("Done!")
        
    for t in range(TRAIN_EPOCHS):
        #print(f"Train epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer)
        #print("Done!")
    
    print('===Testing===')
    test(test_dataloader, model, loss_fn)
    

Pretrain: c0: 2365 | c1: 1638
Train: c0: 113 | c1: 148
Test: c0: 86 | c1: 158
===For participant P004===
loss: 14.602558  [    0/ 4003]
loss: 12.391428  [ 1600/ 4003]
loss: 13.960346  [ 3200/ 4003]
loss: 11.480155  [    0/ 4003]
loss: 6.366040  [ 1600/ 4003]
loss: 9.317552  [ 3200/ 4003]
loss: 6.441248  [    0/ 4003]
loss: 7.825154  [ 1600/ 4003]
loss: 8.125319  [ 3200/ 4003]
loss: 5.393022  [    0/ 4003]
loss: 3.639854  [ 1600/ 4003]
loss: 3.441907  [ 3200/ 4003]
loss: 4.432309  [    0/ 4003]
loss: 3.326151  [ 1600/ 4003]
loss: 1.773094  [ 3200/ 4003]
loss: 1.151577  [    0/ 4003]
loss: 0.713610  [ 1600/ 4003]
loss: 0.675676  [ 3200/ 4003]
loss: 0.643300  [    0/ 4003]
loss: 0.644369  [ 1600/ 4003]
loss: 0.682473  [ 3200/ 4003]
loss: 0.717164  [    0/ 4003]
loss: 0.580322  [ 1600/ 4003]
loss: 0.658578  [ 3200/ 4003]
loss: 0.633845  [    0/ 4003]
loss: 0.623145  [ 1600/ 4003]
loss: 0.609320  [ 3200/ 4003]
loss: 0.709663  [    0/ 4003]
loss: 0.626706  [ 1600/ 4003]
loss: 0.674079  [ 320

KeyboardInterrupt: 