## Imports

In [1]:
import pickle
import pathlib as pl

In [2]:
import numpy as np
import pandas as pd

## Functions

In [3]:
def walk_forward(df, x_labels, y_labels, n, n_input_labels, 
                 split_per=None, split_data=False, y_offset=0):
    
    td_shape = df.shape
    td_l = td_shape[0]
    
    x_indices = [range(i, i+n) for i in range(td_l-n)]
    y_indices = [range(i+y_offset, i+n) for i in range(td_l-n)]
    
    x = df.loc[:, x_labels].values[x_indices]
    y = df.loc[:, y_labels].values[y_indices]
    
    if split_data:
        split_index = int(len(x)*split_per)
        x = x.reshape(-1, n, n_input_labels, 1)
        y = y.reshape(-1, n-y_offset)
        X_train, X_test, y_train, y_test = (x[:split_index], x[split_index:], y[:split_index], y[split_index:])
        return X_train, X_test, y_train, y_test
    
    return x, y

In [4]:
def batch_sequence_split(df, x_labels, y_labels, n, n_input_labels, split_index):
    
    X_train = df[x_labels].iloc[:split_index].values.reshape(-1, n, n_input_labels, 1)
    X_test = df[x_labels].iloc[split_index:].values.reshape(-1, n, n_input_labels, 1)
    y_train = df[y_labels].iloc[:split_index].values.reshape(-1, n)
    y_test = df[y_labels].iloc[split_index:].values.reshape(-1, n)
    
    return X_train, X_test, y_train, y_test

In [5]:
def offset_acceleration(df, n=100):
    acc_cols = [x for x in df.columns if 'acc' in x]
    delayed_acc = df[acc_cols][:len(df)-n]
    delayed_acc = delayed_acc.rename({old:(old + '_delayed') for old in acc_cols}, axis=1)
    return df.iloc[n:, :].reset_index(drop=True).join(delayed_acc)

## Get Data

In [6]:
# data = pickle.load(open('./data/data.pkl', 'rb'))

In [6]:
data = pd.read_pickle('./data/sam/joined_data.pkl')

In [8]:
data.head()

Unnamed: 0,exc_5noise,acc_test,exc_2noise,exc_3noise,exc_1noise,exc_0noise,exc_20noise,exc_30noise,exc,exc_10noise,acc
0,0.236538,0.249363,0.255575,0.226719,0.223894,0.226107,0.366302,0.115219,-0.258322,0.236538,-0.321452
1,-0.063274,-0.073471,-0.04301,-0.070809,-0.076227,-0.076232,0.09901,-0.03588,-0.256361,-0.063274,-0.266683
2,0.141593,0.143289,0.15836,0.133242,0.130273,0.130466,0.239521,0.115698,0.061794,0.141593,0.057487
3,0.110416,0.095285,0.130844,0.101641,0.098399,0.099507,0.227559,0.073316,-0.406225,0.110416,-0.374379
4,-0.130217,-0.13996,-0.104979,-0.134769,-0.141432,-0.140754,0.011251,-0.092754,-0.092476,-0.130217,-0.032465


In [9]:
len(data)

100000

## Preprocess

In [11]:
# preproc_data = data.iloc[:100000, :]
preproc_data = data
preproc_data = offset_acceleration(preproc_data)

In [13]:
preproc_data.head()

Unnamed: 0,exc_5noise,acc_test,exc_2noise,exc_3noise,exc_1noise,exc_0noise,exc_20noise,exc_30noise,exc,exc_10noise,acc,acc_test_delayed,acc_delayed
0,0.180216,0.103993,0.196273,0.166338,0.165389,0.167413,0.272335,0.105922,0.019063,0.180216,-0.029829,0.249363,-0.321452
1,0.237776,0.147726,0.253809,0.228406,0.224434,0.228757,0.32738,0.147517,0.385786,0.237776,0.291223,-0.073471,-0.266683
2,-0.084718,-0.191789,-0.074569,-0.108948,-0.108562,-0.10928,0.019878,-0.080495,0.185781,-0.084718,0.049348,0.143289,0.057487
3,-0.000339,-0.058572,0.019585,-0.009102,-0.013938,-0.013581,0.131675,-0.003485,0.249846,-0.000339,0.086298,0.095285,-0.374379
4,0.186426,0.142308,0.20531,0.168084,0.17985,0.181015,0.290906,0.092922,-0.037129,0.186426,-0.194077,-0.13996,-0.032465


In [14]:
# Some config parameters
data_shape = preproc_data.shape
td_l = data_shape[0]

x_labels = ['exc', 'acc_delayed']
y_labels = ['acc']

n = 100
n_input_labels = len(x_labels)
y_offset = 1
train_percentage = 0.7
length_train_data = len(data_shape)
split_index = int(length_train_data*train_percentage)

In [15]:
X_train, X_test, y_train, y_test = walk_forward(preproc_data, 
                                                x_labels, y_labels, 
                                                n, n_input_labels, y_offset=y_offset,
                                                split_per=0.7, split_data=True)

In [16]:
pickle.dump(X_train, open('./data/X_train.pkl', 'wb'))
pickle.dump(X_test, open('./data/X_test.pkl', 'wb'))
pickle.dump(y_train, open('./data/y_train.pkl', 'wb'))
pickle.dump(y_test, open('./data/y_test.pkl', 'wb'))