In [79]:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
import tensorflow as tf
keras = tf.keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization 
from keras.callbacks import TensorBoard, ModelCheckpoint

In [80]:
SEQ_LEN = 60 				# how long of a preceeding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 3	# how far into the future are we trying to predict?
RATIO_TO_PREDICT = 'LTC-USD'
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [81]:
def classify(current, future):
	if float(future) > float(current):
		return 1
	else:
		return 0

def preprocess_df(df):
	df = df.drop('future', 1) 	# we don't need future
	
	for col in df.columns: 
		if col != 'target':		# normalize all ... except for the target itself!
			# pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
			df[col] = df[col].pct_change()
			# remove the nas (null) created by pct_change
			df.dropna(inplace=True)
			 # scale between 0 and 1.
			df[col] = preprocessing.scale(df[col].values)
	
	df.dropna(inplace=True) # cleanup again... jic. Those nasty NaNs love to creep in.

	sequential_data = []	# this is a list that will CONTAIN the sequences
	prev_days = deque(maxlen=SEQ_LEN) # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

	for i in df.values:
		prev_days.append([n for n in i[:-1]])	 # store all but the target
		if len(prev_days) == SEQ_LEN:	# make sure we have 60 sequences!
			sequential_data.append([np.array(prev_days), i[-1]])	# append those bad boys!
	
	random.shuffle(sequential_data) 	# shuffle for good measure.

	buys = []
	sells = []

	for seq, target in sequential_data:
		if target == 0:
			sells.append([seq, target])
		elif target == 1:
			buys.append([seq, target])
	
	random.shuffle(sells)
	random.shuffle(buys)

	lower = min(len(buys), len(sells))

	buys = buys[:lower]
	sells = sells[:lower]

	sequential_data = buys + sells
	random.shuffle(sequential_data)

	X = []
	y = []

	for seq, target in sequential_data:
		X.append(seq)
		y.append(target)
	
	return np.array(X), np.array(y)


In [82]:
#load data
df = pd.read_csv('crypto_data/LTC-USD.csv', names=['time', 'low', 'high','open','close', 'volume'])
print(df.head())

         time        low       high       open      close      volume
0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200
1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024
2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799
3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067
4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978


In [83]:
#begin with empty data frame
main_df = pd.DataFrame()

# the 4 ratios we want to consider
ratios = ['BTC-USD', 'LTC-USD', 'ETH-USD', 'BCH-USD']

for ratio in ratios:
	dataset = f'crypto_data/{ratio}.csv' 	# get the full path to the file
	df = pd.read_csv(dataset, names=['time', 'low', 'high','open','close', 'volume']) 	# read in specific file
	# rename volume and close to include the ticker so we can still which close/volume is which:
	df.rename(columns={'close': f'{ratio}_close', 'volume': f'{ratio}_volume'}, inplace=True)
	# set time as index so we can join them on this shared time
	df.set_index('time', inplace=True)
	# ignore the other columns besides price and volume
	df = df [[f'{ratio}_close', f'{ratio}_volume']]

	if len(main_df) == 0:  		# if the dataframe is empty
		main_df = df
	else:						# otherwise, join this data to the main one
		main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
print(main_df.head())

            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \
time                                                                       
1528968720    6487.379883        7.706374      96.660004      314.387024   
1528968780    6479.410156        3.088252      96.570000       77.129799   
1528968840    6479.410156        1.404100      96.500000        7.216067   
1528968900    6479.979980        0.753000      96.389999      524.539978   
1528968960    6480.000000        1.490900      96.519997       16.991997   

            ETH-USD_close  ETH-USD_volume  BCH-USD_close  BCH-USD_volume  
time                                                                      
1528968720      486.01001       26.019083     870.859985       26.856577  
1528968780      486.00000        8.449400     870.099976        1.124300  
1528968840      485.75000       26.994646     870.789978        1.749862  
1528968900      486.00000       77.355759     870.000000        1.680500  
1528968960      4

In [84]:
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

print(main_df[[f'{RATIO_TO_PREDICT}_close', 'future']].head())

            LTC-USD_close     future
time                                
1528968720      96.660004  96.389999
1528968780      96.570000  96.519997
1528968840      96.500000  96.440002
1528968900      96.389999  96.470001
1528968960      96.519997  96.400002


In [85]:
main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))
print(main_df[[f'{RATIO_TO_PREDICT}_close', 'future', 'target']].head(10))


            LTC-USD_close     future  target
time                                        
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0
1528969260      96.400002  96.449997       1


In [86]:
times = sorted(main_df.index.values)  # get the times
last_5pct = times[-int(0.05*len(times))]  # get the last 5% of the times

print(last_5pct)

1534922100


In [87]:
validation_main_df = main_df[(main_df.index >= last_5pct)] 	 	# make the validation data where the index is in the last 5%
main_df = main_df[(main_df.index < last_5pct)]	 				# now the main_df is all the data up to the last 5%

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {len(train_y[train_y==0])}, buys: {len(train_y[train_y==1])}")
print(f"VALIDATION Dont buys: {len(validation_y[validation_y==0])}, buys: {len(validation_y[validation_y==1])}")


  df = df.drop('future', 1) 	# we don't need future


train data: 77922 validation: 3860
Dont buys: 38961, buys: 38961
VALIDATION Dont buys: 1930, buys: 1930


  df = df.drop('future', 1) 	# we don't need future


In [89]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation='relu', return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = keras.optimizers.Adam(lr = 0.001, decay = 1e-6)

model.compile(loss='sparse_categorical_crossentropy', optimizer= opt, metrics= ['accuracy'])
model.summary()

tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) 	# saves only the best ones

history = model.fit(train_x, train_y, batch_size= BATCH_SIZE, epochs= EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint])

model.save('')


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 60, 128)           70144     
                                                                 
 dropout_16 (Dropout)        (None, 60, 128)           0         
                                                                 
 batch_normalization_12 (Bat  (None, 60, 128)          512       
 chNormalization)                                                
                                                                 
 lstm_13 (LSTM)              (None, 60, 128)           131584    
                                                                 
 dropout_17 (Dropout)        (None, 60, 128)           0         
                                                                 
 batch_normalization_13 (Bat  (None, 60, 128)          512       
 chNormalization)                                     



Epoch 2/10


INFO:tensorflow:Assets written to: models\RNN_Final-02-0.523.model\assets


Epoch 3/10


INFO:tensorflow:Assets written to: models\RNN_Final-03-0.515.model\assets


Epoch 4/10


INFO:tensorflow:Assets written to: models\RNN_Final-04-0.505.model\assets


Epoch 5/10


INFO:tensorflow:Assets written to: models\RNN_Final-05-0.504.model\assets


Epoch 6/10


INFO:tensorflow:Assets written to: models\RNN_Final-06-0.520.model\assets


Epoch 7/10


INFO:tensorflow:Assets written to: models\RNN_Final-07-0.501.model\assets


Epoch 8/10


INFO:tensorflow:Assets written to: models\RNN_Final-08-0.504.model\assets


Epoch 9/10


INFO:tensorflow:Assets written to: models\RNN_Final-09-0.506.model\assets


Epoch 10/10


INFO:tensorflow:Assets written to: models\RNN_Final-10-0.500.model\assets


INFO:tensorflow:Assets written to: assets


INFO:tensorflow:Assets written to: assets
