## Imports

In [1]:
import pickle
import pathlib as pl

In [2]:
import numpy as np
import pandas as pd

## Functions

In [3]:
def walk_forward(df, x_labels, y_labels, n, n_input_labels, 
                 split_per=None, split_data=False, y_offset=0):
    
    td_shape = df.shape
    td_l = td_shape[0]
    
    x_indices = [range(i, i+n) for i in range(td_l-n)]
    y_indices = [range(i+y_offset, i+n) for i in range(td_l-n)]
    
    x = df.loc[:, x_labels].values[x_indices]
    y = df.loc[:, y_labels].values[y_indices]
    
    if split_data:
        split_index = int(len(x)*split_per)
        x = x.reshape(-1, n, n_input_labels, 1)
        y = y.reshape(-1, n-y_offset)
        X_train, X_test, y_train, y_test = (x[:split_index], x[split_index:], y[:split_index], y[split_index:])
        return X_train, X_test, y_train, y_test
    
    return x, y

In [5]:
def batch_sequence_split(df, x_labels, y_labels, n, n_input_labels, split_index):
    
    X_train = df[x_labels].iloc[:split_index].values.reshape(-1, n, n_input_labels, 1)
    X_test = df[x_labels].iloc[split_index:].values.reshape(-1, n, n_input_labels, 1)
    y_train = df[y_labels].iloc[:split_index].values.reshape(-1, n)
    y_test = df[y_labels].iloc[split_index:].values.reshape(-1, n)
    
    return X_train, X_test, y_train, y_test

In [20]:
def offset_acceleration(df, n=100):
    acc_cols = [x for x in df.columns if 'acc' in x]
    delayed_acc = df[acc_cols][:len(df)-n]
    delayed_acc = delayed_acc.rename({old:(old + '_delayed') for old in acc_cols}, axis=1)
    return df.iloc[n:, :].reset_index(drop=True).join(delayed_acc)

## Get Data

In [6]:
data = pickle.load(open('./data/data.pkl', 'rb'))

## Preprocess

In [23]:
preproc_data = data.iloc[:100000, :]
preproc_data = offset_acceleration(preproc_data)

In [28]:
# Some config parameters
data_shape = preproc_data.shape
td_l = data_shape[0]

x_labels = ['exc', 'del_acc']
y_labels = ['acc']

n = 100
n_input_labels = len(x_labels)
y_offset = 1
train_percentage = 0.7
length_train_data = len(data_shape)
split_index = int(length_train_data*train_percentage)

In [29]:
X_train, X_test, y_train, y_test = walk_forward(preproc_data, 
                                                x_labels, y_labels, 
                                                n, n_input_labels, y_offset=y_offset,
                                                split_per=0.7, split_data=True)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


In [30]:
pickle.dump(X_train, open('./data/X_train.pkl', 'wb'))
pickle.dump(X_test, open('./data/X_test.pkl', 'wb'))
pickle.dump(y_train, open('./data/y_train.pkl', 'wb'))
pickle.dump(y_test, open('./data/y_test.pkl', 'wb'))