In [30]:
from kryptoflow.services.transforms import dataset
import pandas
import numpy

ds = dataset.Dataset()
df = ds.load_df(ds.gdax_path, keep_keys=['ts', 'price', 'volume_24h',
                                           'spread', 'side'])
df.index = pandas.to_datetime(df['ts'])
df_side = pandas.get_dummies(df['side'])
df = pandas.concat([df, df_side], 1).drop('side', 1)
df = df.drop('ts', 1)
df.head()

Unnamed: 0_level_0,price,spread,volume_24h,time_diff,buy,sell
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-03-14 06:15:15,9153.889648,0.01,16492.330078,0.0,0,1
2018-03-14 06:15:19,9153.900391,0.01,16492.332031,4.0,1,0
2018-03-14 06:15:19,9153.900391,0.01,16492.609375,0.0,1,0
2018-03-14 06:15:26,9153.889648,0.01,16492.710938,7.0,0,1
2018-03-14 06:15:26,9153.889648,0.01,16492.736328,0.0,0,1


In [64]:
from itertools import chain

def make_forecasting(df, n_price_features=8):
    top_columns = [['t_' + str(n_price_features - y) for x in range(n_price_features) for y in [x]*len(df.columns)] + ['t_0'], 
                   list(df.columns)*(n_price_features) + ['target']]
    tuples = list(zip(*top_columns))
    headers = pandas.MultiIndex.from_tuples(tuples)
    
    data = []
    index = []
    for i, d in df.groupby(numpy.arange(len(df))//n_price_features):
        
        target = d['price'].values[-1]
        index.append(d.index[-1])
        row_data = [row for row in d.values] 
        new_row = list(chain.from_iterable(d.values.tolist())) + [target]
        
        data.append(new_row)
        
        if len(new_row) != len(tuples):
            continue

    nw = pandas.DataFrame(data, columns=headers, index=index)
    nw['t_0']['target'] = nw['t_0']['target'].shift(-1).fillna(0)
    nw = nw.dropna()
    return nw
            
d_ = make_forecasting(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [65]:
d_.head()

Unnamed: 0_level_0,t_8,t_8,t_8,t_8,t_8,t_8,t_7,t_7,t_7,t_7,...,t_2,t_2,t_2,t_1,t_1,t_1,t_1,t_1,t_1,t_0
Unnamed: 0_level_1,price,spread,volume_24h,time_diff,buy,sell,price,spread,volume_24h,time_diff,...,time_diff,buy,sell,price,spread,volume_24h,time_diff,buy,sell,target
2018-03-14 06:15:39,9153.889648,0.01,16492.330078,0.0,0.0,1.0,9153.900391,0.01,16492.332031,4.0,...,4.0,1.0,0.0,9153.889648,0.01,16492.892578,0.0,0.0,1.0,9153.889648
2018-03-14 06:16:15,9153.889648,0.01,16493.712891,10.0,0.0,1.0,9153.889648,0.01,16493.822266,0.0,...,9.0,1.0,0.0,9153.889648,0.01,16484.835938,3.0,0.0,1.0,9153.889648
2018-03-14 06:16:50,9153.889648,0.01,16484.837891,23.0,0.0,1.0,9153.889648,0.01,16484.853516,0.0,...,7.0,0.0,1.0,9160.0,0.01,16485.166016,2.0,1.0,0.0,9160.0
2018-03-14 06:17:47,9160.0,0.01,16481.835938,2.0,1.0,0.0,9160.799805,0.82,16481.835938,0.0,...,22.0,0.0,1.0,9164.25,0.01,16484.8125,23.0,1.0,0.0,9164.25
2018-03-14 06:18:46,9164.25,0.01,16484.876953,0.0,1.0,0.0,9164.25,0.01,16484.888672,11.0,...,10.0,1.0,0.0,9164.240234,0.01,16484.921875,12.0,0.0,1.0,9164.240234


In [90]:
a = numpy.arange(48*5).reshape((5, 48))
reshaped = numpy.reshape(a, (5, 8, 6))

In [103]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

min_max = MinMaxScaler()

min_max.fit(d_)
scaled = min_max.transform(d_)

def reshape(array, initial_df, transformed_df):
    out_shape = len(initial_df.columns)
    middle_shape = (len(transformed_df.columns)-1)//out_shape
    print(middle_shape, out_shape)
    return numpy.reshape(array, (array.shape[0], middle_shape, out_shape))
    

X_train, X_test, y_train, y_test = train_test_split(scaled[:, :-1], scaled[:, -1])
x_train = reshape(X_train, df, d_)
x_test = reshape(X_test, df, d_)

8 6
8 6


In [106]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard
from time import time

time_steps = 10
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

model = Sequential()
model.add(LSTM(4, input_shape=(8, 6)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=15, batch_size=20, verbose=2, 
          validation_data=(x_test, y_test), callbacks=[tensorboard])

Train on 21786 samples, validate on 7263 samples
Epoch 1/15
 - 17s - loss: 0.0070 - val_loss: 2.4433e-04
Epoch 2/15
 - 17s - loss: 1.3821e-04 - val_loss: 7.7786e-05
Epoch 3/15
 - 17s - loss: 4.7263e-05 - val_loss: 3.0001e-05
Epoch 4/15
 - 17s - loss: 1.9396e-05 - val_loss: 1.4406e-05
Epoch 5/15
 - 17s - loss: 9.8945e-06 - val_loss: 1.2199e-05
Epoch 6/15
 - 17s - loss: 7.3679e-06 - val_loss: 4.7614e-06
Epoch 7/15
 - 17s - loss: 5.3809e-06 - val_loss: 4.0063e-06
Epoch 8/15
 - 17s - loss: 4.2720e-06 - val_loss: 3.2805e-06
Epoch 9/15
 - 17s - loss: 3.8222e-06 - val_loss: 2.9557e-06
Epoch 10/15
 - 17s - loss: 3.3399e-06 - val_loss: 2.1749e-06
Epoch 11/15
 - 17s - loss: 2.9973e-06 - val_loss: 1.8641e-06
Epoch 12/15
 - 17s - loss: 2.7447e-06 - val_loss: 5.7251e-06
Epoch 13/15
 - 17s - loss: 2.4559e-06 - val_loss: 2.0574e-06
Epoch 14/15
 - 17s - loss: 2.3728e-06 - val_loss: 5.8747e-06
Epoch 15/15
 - 17s - loss: 2.1328e-06 - val_loss: 5.0707e-06


<keras.callbacks.History at 0x7f295019fef0>