In [18]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import tensorflow as tf
import matplotlib.pyplot as plt

from WindowGenerator import WindowGenerator

In [19]:
df = pd.read_csv('COINBASE-ETHGBP-20190101-20210101.csv')
df.set_index('time', inplace=True)

df.loc[:, 'ema12'] = df.ta.ema(12)
df.loc[:, 'ema24'] = df.ta.ema(24)
df.loc[:, 'ema48'] = df.ta.ema(48)
# df.loc[:, 'rsi'] = df.ta.rsi()
# df.loc[:, 'vwma'] = df.ta.vwma()
df.loc[:, 'log'] = np.log(df.close)

# df.loc[:, 'return'] = ((df.close - df.close.shift(1))/df.close)*100
# df.loc[:, 'direction'] = df['return'] / abs(df['return']) * 0.1

df.drop(['open', 'high', 'low', 'volume'], axis=1, inplace=True)
df = df[12:]
df

Unnamed: 0_level_0,close,ema12,ema24,ema48,log
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 13:00:00,107.77,107.163098,107.163098,107.163098,4.679999
2019-01-01 14:00:00,107.97,107.309808,107.309808,107.309808,4.681853
2019-01-01 15:00:00,107.10,107.271661,107.271661,107.271661,4.673763
2019-01-01 16:00:00,107.68,107.345904,107.345904,107.345904,4.679164
2019-01-01 17:00:00,107.89,107.444831,107.444831,107.444831,4.681112
...,...,...,...,...,...
2020-12-31 20:00:00,545.80,542.947438,542.947438,542.947438,6.302253
2020-12-31 21:00:00,541.76,542.731540,542.731540,542.731540,6.294823
2020-12-31 22:00:00,543.00,542.780351,542.780351,542.780351,6.297109
2020-12-31 23:00:00,539.71,542.222105,542.222105,542.222105,6.291032


In [20]:
# Split data
n = len(df)
print(f'Length of original df: {n}')

train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

print(f'Length of train df: {len(train_df)}')
print(f'Length of val df: {len(val_df)}')
print(f'Length of test df: {len(test_df)}')

INPUT_WIDTH = 12
LABEL_SHIFT = 1
LABEL_WIDTH = 1
MAX_EPOCHS = 50
STD = train_df.close.std()
MEAN = train_df.close.mean()

Length of original df: 17529
Length of train df: 12270
Length of val df: 3506
Length of test df: 1753


In [21]:
def inf_to_zero(x):
    return 0 if x == -np.inf else x

# Standardise data
def standardise(df, column, mean, std):
    vals = df[col].values
    df.loc[:, column+'_normal'] = pd.Series((vals - mean) / std, index=df.index, dtype=np.float32)
    
cols = [x for x in train_df.columns if 'direction' not in x]

norms = {}

for col in cols:
    vals = train_df[col].values
    std = vals.std()
    mean = vals.mean()
    norms[col+'_std'] = std
    norms[col+'_mean'] = mean
    for df in [train_df, test_df, val_df]:
        standardise(df, col, mean, std)

train_df.drop(cols, axis=1, inplace=True)
val_df.drop(cols, axis=1, inplace=True)
test_df.drop(cols, axis=1, inplace=True)

train_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the docum

Unnamed: 0_level_0,close_normal,ema12_normal,ema24_normal,ema48_normal,log_normal
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 13:00:00,-0.919601,-0.93604,-0.93604,-0.93604,-0.947096
2019-01-01 14:00:00,-0.914471,-0.932271,-0.932271,-0.932271,-0.940117
2019-01-01 15:00:00,-0.936786,-0.933251,-0.933251,-0.933251,-0.970571
2019-01-01 16:00:00,-0.921909,-0.931344,-0.931344,-0.931344,-0.950241
2019-01-01 17:00:00,-0.916523,-0.928803,-0.928803,-0.928803,-0.942907


In [22]:
window = WindowGenerator(input_width=INPUT_WIDTH, label_width=LABEL_WIDTH, shift=LABEL_SHIFT, 
                         train_df=train_df, val_df=val_df, test_df=test_df,
                         label_columns=['close_normal'])
window

Total window size: 13
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Label indices: [12]
Label column name(s): ['close_normal']

In [23]:
LSTM_SIZE = INPUT_WIDTH*len(df.columns)

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('loss') < 6 and logs.get('val_loss') < 17):
            print("\nReached less than 6% loss so cancelling training!")
            self.model.stop_training = True
            
early_stop = myCallback()


model = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1)),
    tf.keras.layers.LSTM(LSTM_SIZE, input_shape=[1, LSTM_SIZE]),
#     tf.keras.layers.Dense(int(LSTM_SIZE/2), activation="relu", input_shape=[1, int(LSTM_SIZE/2)]),
#     tf.keras.layers.Dense(int(LSTM_SIZE/4), activation="relu", input_shape=[1, int(LSTM_SIZE/4)]),
    tf.keras.layers.Dense(1),
])

model.compile(loss=tf.losses.MeanSquaredError(),
            optimizer=tf.optimizers.Adam(), metrics=['mape'])

history = model.fit(window.train, epochs=MAX_EPOCHS, batch_size=96,
                    validation_data=window.val, verbose=1) # , callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50

KeyboardInterrupt: 

In [None]:
SPLIT = 0
min_loss = min(history.history['loss'])
min_val_loss = min(history.history['val_loss'])

print(f'Minimum training loss: {min_loss}')
print(f'Minimum validation loss: {min_val_loss}')

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['loss'][SPLIT:], label='loss')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_loss'][SPLIT:], label='val_loss')
plt.legend()
plt.show()

In [None]:
SPLIT = 0
min_loss = min(history.history['loss'])
min_val_loss = min(history.history['val_loss'])

min_mape = min(history.history['mape'])
min_val_mape = min(history.history['val_mape'])

print(f'Minimum training loss: {min_loss}')
print(f'Minimum validation loss: {min_val_loss}')

print(f'Minimum training mape: {min_mape}')
print(f'Minimum validation mape: {min_val_mape}')

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['loss'][SPLIT:], label='loss')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_loss'][SPLIT:], label='val_loss')
plt.legend()
plt.show()

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['mape'][SPLIT:], label='mape')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_mape'][SPLIT:], label='val_mape')
plt.legend()
plt.show()

In [None]:
validation_history = model.fit(window.val, epochs=int(MAX_EPOCHS/3), batch_size=1, verbose=1)

In [None]:
SPLIT = 0
min_loss = min(validation_history.history['loss'])

print(f'Minimum training loss: {min_loss}')

plt.plot([x for x in range(1, int(MAX_EPOCHS/3)+1)][SPLIT:], validation_history.history['loss'][SPLIT:], label='loss')
plt.legend()
plt.show()

In [None]:
model.evaluate(window.test)

In [None]:
model = tf.keras.models.load_model('BTC_USD_122448EMAs_VWAP_LOG_4HR')

In [None]:
i = 0

for i in range(20, 40):
    new_df = test_df.iloc[(INPUT_WIDTH*i):(INPUT_WIDTH*(i+1))+LABEL_SHIFT]
    input = tf.stack([row.values for _, row in new_df.iloc[:INPUT_WIDTH].iterrows()])
    input = tf.expand_dims(input, axis=0)

    actual = (model.predict(input)[0][0] * norms['close_std']) + norms['close_mean']
    
    expected = (new_df.iloc[-1]['close_normal'] * norms['close_std']) + norms['close_mean']
    new_df.loc[:, 'close'] = pd.Series((new_df['close_normal'].values*norms['close_std'])+norms['close_mean'], index=new_df.index)

    if LABEL_SHIFT > 1:
        plt.plot([x for x in range(-INPUT_WIDTH, 1)], new_df['close'][:INPUT_WIDTH+1])
        plt.plot([x for x in range(0, LABEL_SHIFT)], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
        plt.plot([0, LABEL_SHIFT-1], 
                [new_df['close'][INPUT_WIDTH], actual],
                marker='o', linestyle='dotted')
    else:
        plt.plot(new_df.index, new_df['close'])
        plt.plot(new_df.index[-1], actual, marker='o')
    print(f'Diff is {actual-expected}')
    plt.show()

In [None]:
BIAS = 0

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(40, 60))

for i in range(3, 13):
    new_df = test_df.iloc[(INPUT_WIDTH*i):(INPUT_WIDTH*(i+1))+LABEL_SHIFT]
    input = tf.stack([row.values for _, row in new_df.iterrows()])[:-1]
    input = tf.expand_dims(input, axis=0)
    
    actual = ((model.predict(input)*STD)+MEAN)[0][0]
    expected = (tf.stack([row.values for _, row in new_df.iterrows()])[-1].numpy()[0]*STD)+MEAN
    
    new_df.loc[:, 'close'] = pd.Series((new_df['close_normal'].values*STD)+MEAN, index=new_df.index)
    
    
    if i%2 == 0:
        j = 1
    else:
        j = 0
    
    i = i-3
    map = {0: 0, 1: 0, 2:1, 3:1, 4:2, 5:2, 6:3, 7:3, 8:4, 9:4}
    i = map[i]
    if LABEL_SHIFT > 1:
        # axes[i].plot(new_df.index[:INPUT_WIDTH+1], new_df['close'][:INPUT_WIDTH+1])
        axes[i][j].plot([x for x in range(-INPUT_WIDTH, 1)], new_df['close'][:INPUT_WIDTH+1])
        # axes[i].plot(new_df.index[INPUT_WIDTH:], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
        axes[i][j].plot([x for x in range(0, LABEL_SHIFT)], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
#         axes[i].plot([new_df.index[INPUT_WIDTH], new_df.index[-1]], 
#                      [new_df['close'][INPUT_WIDTH], actual],
#                      marker='o', linestyle='dotted')
        axes[i][j].plot([0, LABEL_SHIFT-1], 
                     [new_df['close'][INPUT_WIDTH], actual],
                     marker='o', linestyle='dotted')
        axes[i][j].set_title(f'{new_df.index[0]} to {new_df.index[-1]}')
        axes[i][j].set_xlabel('Tn (hour)')
        axes[i][j].set_ylabel('Price (USD)')
    else:
        axes[i].plot(new_df.index, new_df['close'])
        axes[i].plot(new_df.index[-1], actual, marker='o')
    print(f'Diff is {actual-expected}')

plt.show()

In [None]:
model.save('BTC_USD_122448EMAs_VWAP_LOG')