In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os

from sklearn import preprocessing

SEQLEN = 60
FUTURE_PREDICT = 3
RATIO = "LTC-USD"


main_df = pd.DataFrame()


ratios=["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
    dataset=f"crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset,names=["time","low","high","open","close","volume"])
    
    df.rename(columns={"close": f"{ratio}_close", "volume": f"vol_{ratio}"},inplace=True)

    df.set_index("time",inplace=True)
    df = df[[f"{ratio}_close", f"vol_{ratio}"]]
    
    
    if len(main_df)==0:
        main_df = df
    else:
        main_df=main_df.join(df)

print(main_df.head())

            BTC-USD_close  vol_BTC-USD  LTC-USD_close  vol_LTC-USD  \
time                                                                 
1528968660    6489.549805     0.587100      96.580002     9.647200   
1528968720    6487.379883     7.706374      96.660004   314.387024   
1528968780    6479.410156     3.088252      96.570000    77.129799   
1528968840    6479.410156     1.404100      96.500000     7.216067   
1528968900    6479.979980     0.753000      96.389999   524.539978   

            ETH-USD_close  vol_ETH-USD  BCH-USD_close  vol_BCH-USD  
time                                                                
1528968660            NaN          NaN     871.719971     5.675361  
1528968720      486.01001    26.019083     870.859985    26.856577  
1528968780      486.00000     8.449400     870.099976     1.124300  
1528968840      485.75000    26.994646     870.789978     1.749862  
1528968900      486.00000    77.355759     870.000000     1.680500  


In [2]:
def classify(currPrice,futPrice):
    if float(futPrice) > float(currPrice):
        return 1
    else:
        return 0
    
    

In [3]:
main_df['future'] = main_df[f"{RATIO}_close"].shift(-FUTURE_PREDICT)
print(main_df[[f"{RATIO}_close","future"]].head())

            LTC-USD_close     future
time                                
1528968660      96.580002  96.500000
1528968720      96.660004  96.389999
1528968780      96.570000  96.519997
1528968840      96.500000  96.440002
1528968900      96.389999  96.470001


In [4]:
main_df["target"] = list(map(classify,main_df[f"{RATIO}_close"],main_df["future"]))

In [5]:
print(main_df[[f"{RATIO}_close","future","target"]].head(10))

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0


In [6]:
print(main_df[1:].head())

            BTC-USD_close  vol_BTC-USD  LTC-USD_close  vol_LTC-USD  \
time                                                                 
1528968720    6487.379883     7.706374      96.660004   314.387024   
1528968780    6479.410156     3.088252      96.570000    77.129799   
1528968840    6479.410156     1.404100      96.500000     7.216067   
1528968900    6479.979980     0.753000      96.389999   524.539978   
1528968960    6480.000000     1.490900      96.519997    16.991997   

            ETH-USD_close  vol_ETH-USD  BCH-USD_close  vol_BCH-USD     future  \
time                                                                            
1528968720      486.01001    26.019083     870.859985    26.856577  96.389999   
1528968780      486.00000     8.449400     870.099976     1.124300  96.519997   
1528968840      485.75000    26.994646     870.789978     1.749862  96.440002   
1528968900      486.00000    77.355759     870.000000     1.680500  96.470001   
1528968960      486.000

In [7]:
times = sorted(main_df.index.values)

In [8]:
last_5pct = times[-int(0.05*len(times))]
print(last_5pct)

1534922100


In [9]:
val_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]


In [23]:
from collections import deque
import random


def preprocessDf(df):
    df = df.drop("future",1)
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    seq_data=[]
    prev_days = deque(maxlen=SEQLEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQLEN:
            seq_data.append([np.array(prev_days),i[-1]])
            
    random.shuffle(seq_data)
    buys=[]
    sells=[]
    for seq,target in seq_data:
        if target==0:
            sells.append([seq,target])
        elif target==1:
            buys.append([seq,target])
     
    random.shuffle(buys)
    random.shuffle(sells)
    
    lwr = min(len(buys),len(sells))
    
    buys = buys[:lwr]
    sells = sells[:lwr]
    seq_data = buys + sells
    random.shuffle(seq_data)
    X=[]
    y=[]
    
    for seq,target in seq_data:
        X.append(seq)
        y.append(target)
    
    return np.array(X),y



xTrain,yTrain = preprocessDf(main_df)
xTest,yTest = preprocessDf(val_main_df)





In [25]:
print(xTrain.shape)
print(xTest.shape)

(69188, 60, 8)
(3062, 60, 8)


In [33]:
import uuid
import time
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dropout,CuDNNLSTM,Dense,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint



EPOCHS=10
BATCH=64

NAME=f"{SEQLEN}-Day-{RATIO}-Predictor-{int(time.time())}"

model = Sequential()
model.add(CuDNNLSTM(128,input_shape=(xTrain.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(xTrain.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(xTrain.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(xTrain.shape[1:])))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(Dense(32,activation="relu"))
model.add(Dropout(0.2))


model.add(Dense(2,activation="softmax"))


model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3,decay=1e-6),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

FILEPATH = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
chkpt = ModelCheckpoint("models/{}.model".format(FILEPATH,monitor="val_acc",verbose=1,save_best_only=True,mode="max"))




history = model.fit(xTrain,yTrain,batch_size = BATCH, epochs=EPOCHS,
                    validation_data=(xTest,yTest),
                    callbacks=[tensorboard,chkpt])




Train on 69188 samples, validate on 3062 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
