In [138]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import pandas as pd
import numpy as np
# from Collections import sort
from sklearn import preprocessing
from collections import deque 
import random
import time

In [139]:
main_df = pd.DataFrame()
ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
    dataset = f"Data/crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset, names=["time","low","high","open","close","volume"])
    
    df.rename(columns = {"close":f"{ratio}_close","volume":f"{ratio}_volume"}, inplace = True)
    
    df.set_index("time",inplace=True)
    df = df[[f"{ratio}_close",f"{ratio}_volume"]]

    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)


In [140]:
SEQ_LEN = 60  # how long of a preceeding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 3  # how far into the future are we trying to predict?
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 2
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"


In [141]:
def predict(present,future):
    if float(future) > float(present):
        return 1
    else :
        return 0

In [143]:
main_df['Future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(predict,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['Future']))

In [144]:
times = sorted(main_df.index.values)  # get the times
last_5pct = times[-int(0.05*len(times))]  #

In [149]:
def preprocess(df):
    df = df.drop("Future", 1)
    #preprocessing
    for column in df.columns:
        if column != "target":
            df[column] = df[column].pct_change()
            df = df.dropna()
            df[column] = preprocessing.scale(df[column].values)
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    
    #making a sequences
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
    
    random.shuffle(sequential_data)
    
    #balancing the class
    print(len(sequential_data))
    buy = []
    sell = []
    
    for value,target in sequential_data:
        if target == 1:
            buy.append([value,target])
        else:
            sell.append([value,target])
    
    lower = min(len(buy), len(sell))
    
    buy = buy[:lower]
    sell = sell[:lower]
    
    sequential_data = buy+sell
    random.shuffle(sequential_data)
    
    xtrain = []
    ytrain = []
    
    print(sequential_data)
    for value,target in sequential_data:
        xtrain.append(value)
        ytrain.append(target)
    
    return np.array(xtrain), ytrain

In [146]:
validation_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [147]:
main_df.head()

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,LTC-USD_close,LTC-USD_volume,ETH-USD_close,ETH-USD_volume,BCH-USD_close,BCH-USD_volume,Future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1528968660,6489.549805,0.5871,96.580002,9.6472,,,871.719971,5.675361,96.5,0
1528968720,6487.379883,7.706374,96.660004,314.387024,486.01001,26.019083,870.859985,26.856577,96.389999,0
1528968780,6479.410156,3.088252,96.57,77.129799,486.0,8.4494,870.099976,1.1243,96.519997,0
1528968840,6479.410156,1.4041,96.5,7.216067,485.75,26.994646,870.789978,1.749862,96.440002,0
1528968900,6479.97998,0.753,96.389999,524.539978,486.0,77.355759,870.0,1.6805,96.470001,1


In [150]:
xtrain, ytrain = preprocess(main_df)

82178


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [151]:
xtest, ytest = preprocess(validation_df)

3806


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [159]:
len(ytrain) ,xtrain.shape

(69188, (69188, 60, 8))

In [None]:
model = Sequential

model.add(CuDNNLSTM(128,input_shape=(), return_sequence=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(CuDNNLSTM(128, return_sequence=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2),activation='softmax')

opt = tf.keras.optimizers.Adam(lr=0.001, decay = 1e-6)

model.compile(loss = 'sparse_categorical_crossentropy',
             optimizers=opt,
              metrics=['accuracy']
             )
tensorboard = TensorBoard(log_dir=f'/logs/{NAME}')

filepath = "RNN_final-{epoch:02d}"
