In [53]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

In [46]:
# read data, drop the first column (datetime)
df = pd.read_csv('./raw_data/CAD_JPY.csv')
df = df.to_numpy()[:,1:]
df

array([[94.464, 94.745, 94.323, 94.476],
       [94.745, 94.783, 94.39, 94.714],
       [94.51, 94.641, 94.22, 94.495],
       ...,
       [118.685, 118.758, 115.619, 118.685],
       [116.55, 116.959, 115.735, 116.793],
       [116.211, 116.95, 115.949, 116.878]], dtype=object)

In [63]:
# create label
label = df[1:,-1] - df[:-1,-1]
label[label > 0] = 1
label[label <= 0] = 0
label = np.append([-1], label) # the first value is assigned to -1, it's removed anyway
label

array([-1, 1, 0, ..., 1, 0, 1], dtype=object)

In [66]:
np.hstack([df, label.reshape(-1,1)])[:15]

array([[94.464, 94.745, 94.323, 94.476, -1],
       [94.745, 94.783, 94.39, 94.714, 1],
       [94.51, 94.641, 94.22, 94.495, 0],
       [94.619, 94.656, 94.12, 94.623, 1],
       [94.01, 94.547, 93.993, 94.03, 0],
       [94.343, 94.401, 94.18, 94.346, 1],
       [94.425, 94.56, 94.39, 94.43, 1],
       [94.466, 94.64, 94.425, 94.48, 1],
       [94.579, 94.886, 94.427, 94.584, 1],
       [94.697, 94.752, 94.126, 94.703, 1],
       [94.11, 94.296, 94.07, 94.093, 0],
       [94.287, 94.381, 94.05, 94.26, 1],
       [94.047, 94.348, 93.985, 94.054, 0],
       [94.25, 94.47, 94.125, 94.254, 1],
       [94.222, 94.307, 93.81, 94.194, 0]], dtype=object)

In [71]:
# normalize data
scaler = StandardScaler()
transformed_df = scaler.fit_transform(df)
transformed_df[:15]

array([[0.35381126, 0.34433509, 0.37924773, 0.35499991],
       [0.38153736, 0.34807352, 0.38587976, 0.37848016],
       [0.35835005, 0.33410359, 0.36905221, 0.35687438],
       [0.36910501, 0.33557929, 0.35915365, 0.36950242],
       [0.30901536, 0.32485589, 0.34658248, 0.31099911],
       [0.34187227, 0.31049244, 0.36509279, 0.34217457],
       [0.34996315, 0.32613483, 0.38587976, 0.35046171],
       [0.3540086 , 0.33400521, 0.38934426, 0.35539454],
       [0.36515824, 0.35820664, 0.38954223, 0.36565481],
       [0.37680123, 0.34502375, 0.35974757, 0.37739494],
       [0.3188823 , 0.30016256, 0.35420437, 0.31721447],
       [0.33634678, 0.30852485, 0.35222466, 0.33369011],
       [0.31266613, 0.30527831, 0.3457906 , 0.31336687],
       [0.33269601, 0.31728065, 0.35964858, 0.33309817],
       [0.32993327, 0.30124474, 0.32846812, 0.32717878]])

In [73]:
look_back = 3

size = transformed_df.shape[0] - look_back
X = np.array([transformed_df[i : i + look_back] for i in range(size)])
y = label[look_back:]

X.shape, y.shape

((2605, 3, 4), (2605,))

In [74]:
support_X, query_X, support_y, query_y = train_test_split(X, y, test_size=0.8, shuffle=False)
support_X.shape, query_X.shape, support_y.shape, query_y.shape

((521, 3, 4), (2084, 3, 4), (521,), (2084,))

In [77]:
import sys
sys.path.append('../')

from data.pre_process import get_data

data_dict = get_data(look_back=3)

In [85]:
data_dict[0]['support_X'][:4]

array([[[-1.32713027, -1.3220764 , -1.29169358, -1.28314868],
        [-1.28457718, -1.32489435, -1.39372517, -1.38089986],
        [-1.38457694, -1.40802392, -1.48005959, -1.4779427 ]],

       [[-1.28457718, -1.32489435, -1.39372517, -1.38089986],
        [-1.38457694, -1.40802392, -1.48005959, -1.4779427 ],
        [-1.48741357, -1.50031182, -1.47292451, -1.48573446]],

       [[-1.38457694, -1.40802392, -1.48005959, -1.4779427 ],
        [-1.48741357, -1.50031182, -1.47292451, -1.48573446],
        [-1.48670435, -1.48833553, -1.46008137, -1.49352622]],

       [[-1.48741357, -1.50031182, -1.47292451, -1.48573446],
        [-1.48670435, -1.48833553, -1.46008137, -1.49352622],
        [-1.49521497, -1.51792402, -1.48576765, -1.49069286]]])

In [84]:
data_dict[0]['support_y'][:3]

array([0., 0., 1.])