# Several Train & Test splits

## Preparations

### options & variables

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
dir_data = '../data/power_consumption/'
n_input = 14
n_out = 7

### modules

In [3]:
import pandas as pd  # data mangling and transforming
import numpy as np  # handling vectors and matrices
from preproc_functions import create_Xy, list_combine  # own preprocessing functions

### Load data

In [4]:
train = np.load(dir_data+'train.npy')
test = np.load(dir_data+'test.npy')

## Splitting

We always want to predict the next X days (output) based on the last Y days (input)

Therefore we create different types of datasets

#### 1. **S**tandard weeks (Monday to Sunday)

In [5]:
train_Xs, train_ys = create_Xy(train, n_input=n_input, 
                               n_out=n_out, n_timesteps=7)

In [6]:
test_Xs, test_ys = create_Xy(test, n_input=n_input, 
                             n_out=n_out, n_timesteps=7)

In [7]:
print(train_Xs.shape)
print(train_ys.shape)
print(test_Xs.shape)
print(test_ys.shape)

(142, 14, 8)
(142, 7)
(59, 14, 8)
(59, 7)


#### 2. **M**ulti-variate case (overlapping weeks)

In [8]:
train_Xm, train_ym = create_Xy(train, n_input=n_input, n_out=n_out)

In [9]:
test_Xm, test_ym = create_Xy(test, n_input=n_input, n_out=n_out)

In [10]:
print(train_Xm.shape)
print(train_ym.shape)
print(test_Xm.shape)
print(test_ym.shape)

(988, 14, 8)
(988, 7)
(407, 14, 8)
(407, 7)


#### 3. **U**ni-variate case (overlapping & only past target variable as input)

In [11]:
train_Xu = train_Xm[:,:,:1]
test_Xu = test_Xm[:,:,:1]
train_yu = train_ym
test_yu = test_ym

In [12]:
print(train_Xu.shape)
print(train_yu.shape)
print(test_Xu.shape)
print(test_yu.shape)

(988, 14, 1)
(988, 7)
(407, 14, 1)
(407, 7)


In [13]:
train_Xu[1][-1][0] == train_yu[0][0]

True

Last element of second X sample is equal to first element of first y sample.

### save all of them locally

In [14]:
l1 = ['train', 'test']
l2 = ['_X', '_y']
l3 = ['s', 'm', 'u']

In [15]:
file_names = list_combine(list_combine(l1,l2), l3)

In [16]:
for item in file_names:
    np.save(dir_data+item, eval(item))