In [30]:
import pandas as pd
import numpy as np
import random
import os
from sklearn import preprocessing
from collections import deque
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [34]:
seq_len = 60
future_period_predict = 3
ratio_to_predict = "LTC-USD"
epochs = 10
batch_size = 64
name = f"{seq_len}-SEQ-{future_period_predict}-PRED-{int(time.time())}"

In [52]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

def preprocess_df(df):
    df = df.drop('future',1)
    
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=seq_len)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == seq_len:
            sequential_data.append([np.array(prev_days),i[-1]])
    
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq,target])
        elif target == 1:
            buys.append([seq,target])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys),len(sells))
    
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys+sells
    random.shuffle(sequential_data)
    
    X = []
    y = []
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    
    return np.array(X),np.array(y)
    

In [44]:
main_df = pd.DataFrame()
ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
    dataset = f"crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset, names = ["time","low","high", "open", "close", "volume"])
    df.rename(columns={"close":f"{ratio}_close", "volume":f"{ratio}_volume"},inplace=True)
    
    df.set_index("time",inplace=True)
    df = df[[f"{ratio}_close",f"{ratio}_volume"]]
    
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

In [53]:
main_df['future'] = main_df[f"{ratio_to_predict}_close"].shift(-future_period_predict)
main_df["target"] = list(map(classify,main_df[f"{ratio_to_predict}_close"], main_df["future"]))
print(main_df[[f"{ratio_to_predict}_close","future","target"]].head(10))

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0


In [54]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]

In [55]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [56]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

In [20]:
print(f"train data: {len(train_x)}, validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys:{train_y.count(1)}")
print(f"Validation Dont buys: {validation_y.count(0)}, buys:{validation_y.count(1)}")

train data: 69188, validation: 3062
Dont buys: 34594, buys:34594
Validation Dont buys: 1531, buys:1531


In [49]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2,activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001,decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
             metrics=['accuracy'])

In [76]:
tboard_log_dir = os.path.join("logs",name)
tensorboard = TensorBoard(log_dir=tboard_log_dir)

#filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
#checkpoint = ModelCheckpoint("models\{}.model".format(filepath,monitor='val_acc',verbose=1, save_best_only=True,mode='max'))

In [79]:
history = model.fit(train_x,train_y,batch_size=batch_size,epochs=epochs,
                   validation_data=(validation_x,validation_y))

Train on 65962 samples, validate on 3174 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [81]:
model.save("models\{}".format(name))