In [1]:
import os
import numpy as np
import pandas as pd
import h5py
from data_helpers import timeseries_shuffler, split_data
%matplotlib inline

In [2]:
def group_consecutives(vals, step=1):
    """Return list of consecutive lists of numbers from vals (number list)."""
    run = []
    result = [run]
    expect = None
    for v in vals:
        if (v == expect) or (expect is None):
            run.append(v)
        else:
            run = [v]
            result.append(run)
        expect = v + step
    return result

def get_turn_peaks(dx,threshold):
    ## ephys = samples x electrode channels
    crossings =  np.where(abs(dx) > threshold)[0]
    peaks = []
    grouped_crossings = group_consecutives(crossings)
    for idx,thing in enumerate(grouped_crossings):
        center = thing[np.argmax(abs(dx[thing]))]
        peaks.append(center)
        
    return peaks

def read_data_pair(index):
    x_temp = h5py.File('datasets/X/{}'.format(index), 'r')
    x_temp = np.asarray([np.asarray(x_temp[key]) for key in x_temp.keys()])[0]

    y_temp = h5py.File('datasets/y/{}'.format(index), 'r')
    y_temp = np.asarray([np.asarray(y_temp[key]) for key in y_temp.keys()]).T

    return x_temp, y_temp

def get_labels(X,y):
    peaks = get_turn_peaks(y,threshold=.75)
    y_peaks = y[peaks]
    X_corr = X[peaks]
    
    labels = []
    for peak in peaks:
        if y[peak] > 0:
            labels.append(1)
        elif y[peak] < 0:
            labels.append(-1)
    labels = np.array(labels)
    return labels, X_corr, y_peaks

x, y = read_data_pair(1)
labels, X_corr, y_peaks = get_labels(x,y)

In [9]:
dataset_idxs = os.listdir('datasets/y')
all_labels = np.array([])
for idx in dataset_idxs:
    x, y = read_data_pair(idx)
    labels, X_corr, y_peaks = get_labels(x,y)
    all_labels = np.concatenate((all_labels, labels))

all_labels.shape

(203,)