## Recurrent Neural Networks

...

In [2]:
import pandas as pd

df = pd.read_csv('crypto_data/BTC-USD.csv', names = ['Time', 'Low', 'High', 'Open', 'Close', 'Volume'])
print(df.head())

         Time          Low         High         Open        Close    Volume
0  1528968660  6489.549805  6489.560059  6489.560059  6489.549805  0.587100
1  1528968720  6487.370117  6489.560059  6489.549805  6487.379883  7.706374
2  1528968780  6479.410156  6487.370117  6487.370117  6479.410156  3.088252
3  1528968840  6479.410156  6479.419922  6479.419922  6479.410156  1.404100
4  1528968900  6475.930176  6479.979980  6479.410156  6479.979980  0.753000


...

In [4]:
SEQ_LENGTH = 60
PERIOD_TO_PREDICT = 3

def classify(price1, price2):
    if (float(price2) > float(price1)):
        return 1
    else:
        return 0

## We want to shift
df['Future'] = df['Close'].shift(-PERIOD_TO_PREDICT)
print(df.head())

         Time          Low         High         Open        Close    Volume  \
0  1528968660  6489.549805  6489.560059  6489.560059  6489.549805  0.587100   
1  1528968720  6487.370117  6489.560059  6489.549805  6487.379883  7.706374   
2  1528968780  6479.410156  6487.370117  6487.370117  6479.410156  3.088252   
3  1528968840  6479.410156  6479.419922  6479.419922  6479.410156  1.404100   
4  1528968900  6475.930176  6479.979980  6479.410156  6479.979980  0.753000   

        Future  
0  6479.410156  
1  6479.979980  
2  6480.000000  
3  6477.220215  
4  6480.000000  


In [6]:
df['Target'] = list(map(classify, df['Close'], df['Future']))
print(df.head(10))

         Time          Low         High         Open        Close    Volume  \
0  1528968660  6489.549805  6489.560059  6489.560059  6489.549805  0.587100   
1  1528968720  6487.370117  6489.560059  6489.549805  6487.379883  7.706374   
2  1528968780  6479.410156  6487.370117  6487.370117  6479.410156  3.088252   
3  1528968840  6479.410156  6479.419922  6479.419922  6479.410156  1.404100   
4  1528968900  6475.930176  6479.979980  6479.410156  6479.979980  0.753000   
5  1528968960  6477.959961  6480.000000  6477.959961  6480.000000  1.490900   
6  1528969020  6477.220215  6480.000000  6479.990234  6477.220215  2.731950   
7  1528969080  6477.220215  6480.000000  6477.220215  6480.000000  2.174240   
8  1528969140  6479.990234  6479.990234  6479.990234  6479.990234  0.903100   
9  1528969200  6477.259766  6479.990234  6479.990234  6478.660156  3.258786   

        Future  Target  
0  6479.410156       0  
1  6479.979980       0  
2  6480.000000       1  
3  6477.220215       0  
4  64

...

In [41]:
import random
import numpy as np
from sklearn import preprocessing
from collections import deque

## This helper function is to normalize columns and remove NaNs
def preprocess(df):
    df = df.drop('Future', 1)
    for col in df.columns:
        if col != 'Target':
            df.dropna(inplace=True)
            df[col] = df[col].pct_change()
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace = True)

## This helper function is to balance number of 0s and 1s
def balance(data):
    random.shuffle(data)
    buys = []
    sells = []
    for x, y in data:
        if y == 0:
            sells.append([x, y])
        else:
            buys.append([x, y])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    buys = buys[:min(len(buys), len(sells))]
    sells = sells[:min(len(buys), len(sells))]
    data = buys + sells
    
    random.shuffle(data)
    return data

## This function splits the dataset into x and y values
def split(df):
    data = []
    prev_days = deque(maxlen = SEQ_LENGTH)
    
    for value in df.values:
        prev_days.append([n for n in value[:-1]])
        if len(prev_days) == SEQ_LENGTH:
            data.append([np.array(prev_days), value[-1]])
    
    data = balance(data)
    x = []
    y = []
    for seq, target in data:
        x.append(seq)
        y.append(target)
    
    return np.array(x), np.array(y)
    

## We can create our training and test data sets
times = sorted(df.index.values)
cutoff = times[-int(0.05*len(times))]

training_set = df[(df.index < cutoff)]
validation_set = df[(df.index >= cutoff)]

preprocess(training_set)
preprocess(validation_set)

x_train, y_train = split(training_set)
x_test, y_test = split(validation_set)

...

In [46]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization

model = Sequential()
model.add(LSTM(128, input_shape = x_train.shape[1:], return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape = x_train.shape[1:], return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape = x_train.shape[1:]))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation = tf.nn.relu))
model.add(Dropout(0.2))

model.add(Dense(2, activation = tf.nn.softmax))

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001, decay = 1e-6), 
    loss = 'sparse_categorical_crossentropy', 
    metrics = ['accuracy']
)

# model.fit(x_train, y_train, batch_size = 64, epochs = 3)

...