In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense,Dropout,LSTM,CuDNNLSTM,BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.utils import np_utils
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
tf.__version__

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


'1.12.0'

In [2]:
fnames=os.listdir("crypto_data")
cnames=[i.split(".",1)[0] for i in fnames]
print(cnames)

['BCH-USD', 'BTC-USD', 'ETH-USD', 'LTC-USD']


In [3]:
maindf=pd.DataFrame()
for cname in cnames:
    ds=f"crypto_data/{cname}.csv"
    ds=pd.read_csv(ds,names=["time","low","high","open","close","volume"])
    ds.rename(columns={"close":f"{cname}_close","volume":f"{cname}_volume"},inplace=True)
    ds.set_index("time",inplace=True)
    ds=ds[[f"{cname}_close",f"{cname}_volume"]]
    
    if len(maindf)==0:
        maindf=ds
    else:
        maindf=maindf.join(ds)
    
maindf.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978


In [4]:
SEQ_LEN= 60
FUTURE_PERIOD_PREDICT= 3
RATIO_TO_PREDICT="LTC-USD"

def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0

maindf['future']=maindf[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
maindf['target']=list(map(classify,maindf[f"{RATIO_TO_PREDICT}_close"],maindf['future']))
maindf.head(10)

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472,96.5,0
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024,96.389999,0
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799,96.519997,0
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067,96.440002,0
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978,96.470001,1
1528968960,869.98999,1.669014,6480.0,1.4909,486.0,7.5033,96.519997,16.991997,96.400002,0
1528969020,869.450012,0.8652,6477.220215,2.73195,485.98999,85.877251,96.440002,95.524078,96.400002,0
1528969080,869.98999,23.534929,6480.0,2.17424,485.98999,160.915192,96.470001,175.205307,96.400002,0
1528969140,870.0,2.3,6479.990234,0.9031,485.98999,61.371887,96.400002,43.652802,96.400002,0
1528969200,870.320007,9.255514,6478.660156,3.258786,485.98999,42.687656,96.400002,8.16,96.400002,0


In [5]:
oot_per=0.05
times=sorted(maindf.index.values)
cutoff=times[-int(0.05*len(times))]
print(cutoff)
validation_df=maindf[(maindf.index>=cutoff)]
maindf=maindf[(maindf.index<cutoff)]
print(validation_df.shape)
print(maindf.shape)

1534879920
(4611, 10)
(87614, 10)


In [6]:
def preprocess_df(df):
    df.drop('future',1)
    for col in df.columns:
        if col!='target':
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    sequential_data=[]
    prev_days=deque(maxlen=SEQ_LEN)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days)== SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
        
    random.shuffle(sequential_data)
    buys=[]
    sells=[]
    
    for seq,target in sequential_data:
        if target==0:
            buys.append([seq,target])
        elif target==1:
            sells.append([seq,target])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower=min(len(buys),len(sells))
    buys=buys[:lower]
    sells=sells[:lower]
    
    sequential_data=buys+sells
    random.shuffle(sequential_data)
    
    X=[]
    y=[]
    
    for seq,target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y

In [7]:
train_x,train_y=preprocess_df(maindf)
validation_x,validation_y=preprocess_df(validation_df)

In [11]:
train_y[1]

1.0

In [22]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 69354 validation: 3688
Dont buys: 34677, buys: 34677
VALIDATION Dont buys: 1844, buys: 1844


In [None]:
EPOCHS=10
BATCH_SIZE=64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"