In [2]:
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import train_test_split
import keras
from sklearn.model_selection import TimeSeriesSplit
from keras.layers import Input, LSTM, Dense, TimeDistributed, Activation, BatchNormalization, Dropout, Bidirectional
from keras.models import Sequential
from keras.utils import Sequence
#from keras.layers import CuDNNLSTM
import matplotlib.pyplot as plt
from matplotlib import rc

In [3]:
data_main = pd.read_csv(
    './datasets/tradingview/bitcoin/BTCUSD_1D_to_2023-03-11.csv',
    delimiter=',',
    low_memory=False
)

In [4]:
data_main

Unnamed: 0,time,open,high,low,close,Histogram,MACD,Signal,RSI,RSI-based MA,Upper Bollinger Band,Lower Bollinger Band,OnBalanceVolume,Smoothing Line,ADX,NV
0,2009-10-05T04:00:00+04:00,0.000764,0.000764,0.000764,0.000764,,,,,,,,,,,
1,2009-10-06T04:00:00+04:00,0.000885,0.000885,0.000885,0.000885,,,,,,,,0.000000e+00,,,0.000000
2,2009-10-07T04:00:00+04:00,0.001050,0.001050,0.001050,0.001050,,,,,,,,0.000000e+00,,,0.000000
3,2009-10-08T04:00:00+04:00,0.001084,0.001084,0.001084,0.001084,,,,,,,,0.000000e+00,,,0.000000
4,2009-10-09T04:00:00+04:00,0.001200,0.001200,0.001200,0.001200,,,,,,,,0.000000e+00,,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4803,2023-03-07T03:00:00+03:00,22415.570000,22552.120000,21927.510000,22201.830000,-238.134300,-109.924026,128.210274,41.678396,49.566643,,,7.408373e+06,7.426450e+06,22.036393,-15118.130417
4804,2023-03-08T03:00:00+03:00,22203.580000,22282.920000,21588.560000,21701.470000,-258.385641,-194.771777,63.613864,37.572144,48.014107,,,7.390680e+06,7.417858e+06,21.677303,-17693.066945
4805,2023-03-09T03:00:00+03:00,21699.330000,21842.130000,19858.830000,20346.900000,-344.559235,-367.085180,-22.525945,29.188244,46.027901,,,7.358888e+06,7.403712e+06,23.090552,-31792.011157
4806,2023-03-10T03:00:00+03:00,20349.670000,20350.450000,19559.360000,20222.680000,-388.230876,-507.814539,-119.583664,28.558888,44.464712,,,7.313238e+06,7.378934e+06,24.620602,-45649.645680


In [5]:
#DATE = 'Date'
TIME = 'time'
OPEN = 'open'
HIGH = 'high'
LOW = 'low'
CLOSE = 'close'
VOLUME = 'volume'
#MARKET_CAP = 'Market cap'

#INDICATORS
MOVING_AVERAGE = 'MA'
SMOOTHING_LINE = 'Smoothing Line'
HISTOGRAM = 'Histogram'
MACD = 'MACD'
SIGNAL = 'Signal'
RSI = 'RSI'
RSI_MA = 'RSI-based MA'
UP_BOL_BAND = 'Upper Bollinger Band'
LOW_BOL_BAND = 'Lower Bollinger Band'
OBV = 'OnBalanceVolume'
ADX = 'ADX'
NV = 'NV'

ALL_COLUMNS = {
    TIME, OPEN, HIGH, LOW, CLOSE,
    MOVING_AVERAGE, SMOOTHING_LINE, HISTOGRAM, MACD, SIGNAL, RSI, RSI_MA, UP_BOL_BAND, LOW_BOL_BAND, OBV, ADX
}


In [6]:


class DatasetPreparator:
    def __init__(self, dataset):
        self.dataset: DataFrame = dataset.copy(deep=True)


    def prepare(self):
        self.remove_columns()
        self.transform_types()
        self.remove_nan()
        return self.dataset


    def preprocess(self):
        self.optimize_values([OPEN, HIGH, LOW, CLOSE, HISTOGRAM, MACD, SIGNAL, RSI, RSI_MA, OBV, SMOOTHING_LINE, ADX, NV])
        return self.dataset

    def get_data_for_prediction(self, test_size=0.1):
        self.prepare()
        self.preprocess()
        self.remove_columns_for_prediction()
        x_train, x_test, y_train, y_test = train_test_split(
            self.dataset,
            self.dataset[CLOSE],
            test_size=test_size,
            shuffle=False
        )
        return x_train.to_numpy(), x_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy()



    def transform_types(self):
        try:
            self.dataset[TIME] = pd.to_datetime(self.dataset[TIME],unit='s')
        except ValueError:
            self.dataset[TIME] = pd.to_datetime(self.dataset[TIME], utc=True)

    def remove_columns(self):
        unused_columns = [
            UP_BOL_BAND, LOW_BOL_BAND
        ]
        self.dataset= self.dataset.drop(unused_columns, axis=1)

    def remove_columns_for_prediction(self):
        self.dataset= self.dataset.drop([TIME], axis=1)

    @staticmethod
    def plot(dataset):
        # data_plot = self.dataset[[TIME, CLOSE]]
        # data_plot.plot()
        ax = dataset.plot(x=TIME, y=CLOSE)
        ax.set_xlabel("Date")
        ax.set_ylabel("Close Price (USD)")

    def remove_nan(self):
        self.dataset = self.dataset.fillna(0.0)


    def optimize_values(self, columns):
        scaler = MinMaxScaler(feature_range=(0,1))
        self.dataset[columns] = scaler.fit_transform(self.dataset[columns])

    def reverse_data(self, arr):
        scaler = MinMaxScaler(feature_range=(0,1))
        close_price = self.dataset[CLOSE].values.reshape(-1, 1)
        print(close_price)
        scaler.fit_transform(close_price)
        return scaler.inverse_transform(arr)


preparator = DatasetPreparator(dataset=data_main)
#dataset = preparator.prepare()
#dataset
#DatasetPreparator.plot(dataset)
x_train, x_test, y_train, y_test = preparator.get_data_for_prediction()

In [14]:
np.save( './datasets/prepared/btc/1d_with_indicators/v1/x_train.npy', x_train)
np.save( './datasets/prepared/btc/1d_with_indicators/v1/x_test.npy', x_test)
np.save( './datasets/prepared/btc/1d_with_indicators/v1/y_train.npy', y_train)
np.save( './datasets/prepared/btc/1d_with_indicators/v1/y_test.npy', y_test)

In [7]:
def to_sequences(data, seq_len):
    seq_len -= 1
    d = []

    for index in range(len(data) - seq_len):
        d.append(data[index: index + seq_len])

    return np.array(d)


In [8]:
DROPOUT = 0.2
SEQ_LEN = 100
WINDOW_SIZE = SEQ_LEN - 1

model = keras.Sequential()

model.add(Bidirectional(LSTM(WINDOW_SIZE, return_sequences=True),
                        input_shape=(WINDOW_SIZE, x_train.shape[-1])))
model.add(Dropout(rate=DROPOUT))

model.add(Bidirectional(LSTM((WINDOW_SIZE * 2), return_sequences=True)))
model.add(Dropout(rate=DROPOUT))

model.add(Bidirectional(LSTM(WINDOW_SIZE, return_sequences=False)))

model.add(Dense(units=1))

model.add(Activation('linear'))

model.compile(
    loss='mean_squared_error',
    optimizer='adam'
)

2023-03-29 21:18:41.134986: W tensorflow/core/common_runtime/pluggable_device/pluggable_device_bfc_allocator.cc:28] Overriding allow_growth setting because force_memory_growth was requested by the device.


In [9]:
x_train1 = to_sequences(x_train, SEQ_LEN)
y_train1 = to_sequences(y_train, SEQ_LEN)
x_test1 = to_sequences(x_test, SEQ_LEN)
y_test1 = to_sequences(y_test, SEQ_LEN)

print(x_train1.shape)
print(y_train1.shape)
print(x_test1.shape)
print(y_test1.shape)

(4228, 99, 13)
(4228, 99)
(382, 99, 13)
(382, 99)


In [None]:
BATCH_SIZE = 64

# history = model.fit_generator(
#     data_generator(tscv.split(x_train, y_train)),
#     epochs=50,
#     batch_size=BATCH_SIZE,
# )

history = model.fit(
    x_train1,
    y_train1,
    epochs=50,
    batch_size=BATCH_SIZE,
    shuffle=False,
    validation_split=0.1
)