In [2]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import tensorflow as tf
import matplotlib.pyplot as plt

from WindowGenerator import WindowGenerator

In [3]:
df = pd.read_csv('BTC_USD_2015-10-08_2020-05-27_Gemini_Hourly.csv')
df.set_index('Date', inplace=True)

df.loc[:, 'ema12'] = df.ta.ema(12)
df.loc[:, 'ema24'] = df.ta.ema(24)
df.loc[:, 'ema48'] = df.ta.ema(48)
df.loc[:, 'vwma'] = df.ta.vwma()
df.loc[:, 'log'] = np.log(df.close)

df.drop(['currency', 'open', 'high', 'low', 'volume'], axis=1, inplace=True)
df = df[48:]
df

Unnamed: 0_level_0,close,ema12,ema24,ema48,vwma,log
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-10-11 00:00:00,246.30,245.733371,245.733371,245.733371,245.632542,5.506550
2015-10-11 01:00:00,246.30,245.836394,245.836394,245.836394,245.725661,5.506550
2015-10-11 02:00:00,246.30,245.920686,245.920686,245.920686,245.818708,5.506550
2015-10-11 03:00:00,246.30,245.989652,245.989652,245.989652,245.911686,5.506550
2015-10-11 04:00:00,246.30,246.046079,246.046079,246.046079,246.004593,5.506550
...,...,...,...,...,...,...
2020-05-23 19:00:00,9186.01,9179.465780,9179.465780,9179.465780,9169.374777,9.125437
2020-05-23 20:00:00,9238.75,9190.244729,9190.244729,9190.244729,9179.574617,9.131162
2020-05-23 21:00:00,9235.01,9198.383869,9198.383869,9198.383869,9184.846033,9.130757
2020-05-23 22:00:00,9228.26,9203.815893,9203.815893,9203.815893,9195.036730,9.130026


In [4]:
# Split data
n = len(df)
print(f'Length of original df: {n}')

train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

print(f'Length of train df: {len(train_df)}')
print(f'Length of val df: {len(val_df)}')
print(f'Length of test df: {len(test_df)}')

INPUT_WIDTH = 20
LABEL_SHIFT = 4
LABEL_WIDTH = 4
MAX_EPOCHS = 100
STD = train_df.close.std()
MEAN = train_df.close.mean()

Length of original df: 40488
Length of train df: 28341
Length of val df: 8098
Length of test df: 4049


In [5]:
def inf_to_zero(x):
    return 0 if x == -np.inf else x

# Standardise data
def standardise(df, column, mean, std):
    vals = df[col].values
    df.loc[:, column+'_normal'] = pd.Series((vals - mean) / std, index=df.index, dtype=np.float64)
    # df.loc[:, column+'_normal'] = pd.Series(np.vectorize(inf_to_zero)(np.log(vals)).astype(np.float64), index=df.index)

cols = [x for x in train_df.columns]

norms = {}

for col in cols:
    vals = train_df[col].values
    std = vals.std()
    mean = vals.mean()
    norms[col+'_std'] = std
    norms[col+'_mean'] = mean
    for df in [train_df, test_df, val_df]:
        standardise(df, col, mean, std)

train_df.drop(cols, axis=1, inplace=True)
val_df.drop(cols, axis=1, inplace=True)
test_df.drop(cols, axis=1, inplace=True)

train_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0_level_0,close_normal,ema12_normal,ema24_normal,ema48_normal,vwma_normal,log_normal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-10-11 00:00:00,-0.906402,-0.906648,-0.906648,-0.906648,-0.906504,-1.670365
2015-10-11 01:00:00,-0.906402,-0.906622,-0.906622,-0.906622,-0.90648,-1.670365
2015-10-11 02:00:00,-0.906402,-0.9066,-0.9066,-0.9066,-0.906456,-1.670365
2015-10-11 03:00:00,-0.906402,-0.906582,-0.906582,-0.906582,-0.906432,-1.670365
2015-10-11 04:00:00,-0.906402,-0.906568,-0.906568,-0.906568,-0.906408,-1.670365


In [6]:
window = WindowGenerator(input_width=INPUT_WIDTH, label_width=LABEL_WIDTH, shift=LABEL_SHIFT, 
                         train_df=train_df, val_df=val_df, test_df=test_df,
                         label_columns=['close_normal'])
window

Total window size: 24
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
Label indices: [20 21 22 23]
Label column name(s): ['close_normal']

In [None]:
LSTM_SIZE = INPUT_WIDTH*len(df.columns)

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('loss') < 6 and logs.get('val_loss') < 17):
            print("\nReached less than 6% loss so cancelling training!")
            self.model.stop_training = True
            
early_stop = myCallback()


model = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1)),
    tf.keras.layers.LSTM(LSTM_SIZE, input_shape=[1, LSTM_SIZE]),
#     tf.keras.layers.Dense(int(LSTM_SIZE/2), activation="relu", input_shape=[1, int(LSTM_SIZE/2)]),
#     tf.keras.layers.Dense(int(LSTM_SIZE/4), activation="relu", input_shape=[1, int(LSTM_SIZE/4)]),
    tf.keras.layers.Dense(1),
])

model.compile(loss=tf.losses.MeanAbsolutePercentageError(),
            optimizer=tf.optimizers.Adam())

# model.compile(loss=tf.losses.Huber(),
#               optimizer=tf.optimizers.Adam(),
#              metrics=['mape'])

history = model.fit(window.train, epochs=MAX_EPOCHS, batch_size=96,
                    validation_data=window.val, verbose=1) # , callbacks=[early_stop])

In [None]:
SPLIT = 0
min_loss = min(history.history['loss'])
min_val_loss = min(history.history['val_loss'])

print(f'Minimum training loss: {min_loss}')
print(f'Minimum validation loss: {min_val_loss}')

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['loss'][SPLIT:], label='loss')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_loss'][SPLIT:], label='val_loss')
plt.legend()
plt.show()

In [None]:
SPLIT = 0
min_loss = min(history.history['loss'])
min_val_loss = min(history.history['val_loss'])

min_mape = min(history.history['mape'])
min_val_mape = min(history.history['val_mape'])

print(f'Minimum training loss: {min_loss}')
print(f'Minimum validation loss: {min_val_loss}')

print(f'Minimum training mape: {min_mape}')
print(f'Minimum validation mape: {min_val_mape}')

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['loss'][SPLIT:], label='loss')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_loss'][SPLIT:], label='val_loss')
plt.legend()
plt.show()

plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['mape'][SPLIT:], label='mape')
plt.plot([x for x in range(1, MAX_EPOCHS+1)][SPLIT:], history.history['val_mape'][SPLIT:], label='val_mape')
plt.legend()
plt.show()

In [None]:
validation_history = model.fit(window.val, epochs=int(MAX_EPOCHS/3), batch_size=1, verbose=1)

In [None]:
SPLIT = 0
min_loss = min(validation_history.history['loss'])

print(f'Minimum training loss: {min_loss}')

plt.plot([x for x in range(1, int(MAX_EPOCHS/3)+1)][SPLIT:], validation_history.history['loss'][SPLIT:], label='loss')
plt.legend()
plt.show()

In [8]:
model.evaluate(window.test)



1.8699779510498047

In [None]:
i = 0

for i in range(5, 20):
    new_df = test_df.iloc[(INPUT_WIDTH*i):(INPUT_WIDTH*(i+1))+LABEL_SHIFT]
    input = tf.stack([row.values for _, row in new_df.iloc[:INPUT_WIDTH].iterrows()])
    input = tf.expand_dims(input, axis=0)

#     actual = np.exp(model.predict(input)[0][0])
#     expected = np.exp(new_df.iloc[-1]['close_normal'])
#     new_df.loc[:, 'close'] = pd.Series(np.exp(new_df['close_normal'].values), index=new_df.index)

    actual = (model.predict(input)[0][0] * norms['close_std']) + norms['close_mean']
    expected = (new_df.iloc[-1]['close_normal'] * norms['close_std']) + norms['close_mean']
    new_df.loc[:, 'close'] = pd.Series((new_df['close_normal'].values*norms['close_std'])+norms['close_mean'], index=new_df.index)

    if LABEL_SHIFT > 1:
        plt.plot([x for x in range(-INPUT_WIDTH, 1)], new_df['close'][:INPUT_WIDTH+1])
        plt.plot([x for x in range(0, LABEL_SHIFT)], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
        plt.plot([0, LABEL_SHIFT-1], 
                     [new_df['close'][INPUT_WIDTH], actual],
                     marker='o', linestyle='dotted')
    else:
        plt.plot(new_df.index, new_df['close'])
        plt.plot(new_df.index[-1], actual, marker='o')
    print(f'Diff is {actual-expected}')
    plt.show()

In [None]:
BIAS = 0

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(40, 60))

for i in range(3, 13):
    new_df = test_df.iloc[(INPUT_WIDTH*i):(INPUT_WIDTH*(i+1))+LABEL_SHIFT]
    input = tf.stack([row.values for _, row in new_df.iterrows()])[:-1]
    input = tf.expand_dims(input, axis=0)
    
    actual = ((model.predict(input)*STD)+MEAN)[0][0]
    expected = (tf.stack([row.values for _, row in new_df.iterrows()])[-1].numpy()[0]*STD)+MEAN
    
    new_df.loc[:, 'close'] = pd.Series((new_df['close_normal'].values*STD)+MEAN, index=new_df.index)
    
    
    if i%2 == 0:
        j = 1
    else:
        j = 0
    
    i = i-3
    map = {0: 0, 1: 0, 2:1, 3:1, 4:2, 5:2, 6:3, 7:3, 8:4, 9:4}
    i = map[i]
    if LABEL_SHIFT > 1:
        # axes[i].plot(new_df.index[:INPUT_WIDTH+1], new_df['close'][:INPUT_WIDTH+1])
        axes[i][j].plot([x for x in range(-INPUT_WIDTH, 1)], new_df['close'][:INPUT_WIDTH+1])
        # axes[i].plot(new_df.index[INPUT_WIDTH:], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
        axes[i][j].plot([x for x in range(0, LABEL_SHIFT)], new_df['close'][INPUT_WIDTH:], linestyle='dotted')
#         axes[i].plot([new_df.index[INPUT_WIDTH], new_df.index[-1]], 
#                      [new_df['close'][INPUT_WIDTH], actual],
#                      marker='o', linestyle='dotted')
        axes[i][j].plot([0, LABEL_SHIFT-1], 
                     [new_df['close'][INPUT_WIDTH], actual],
                     marker='o', linestyle='dotted')
        axes[i][j].set_title(f'{new_df.index[0]} to {new_df.index[-1]}')
        axes[i][j].set_xlabel('Tn (hour)')
        axes[i][j].set_ylabel('Price (USD)')
    else:
        axes[i].plot(new_df.index, new_df['close'])
        axes[i].plot(new_df.index[-1], actual, marker='o')
    print(f'Diff is {actual-expected}')

plt.show()

In [None]:
model.save('BTC_USD_122448EMAs_VWAP_LOG')