# Neural Network

In [71]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from collections import deque
import random
import time
from keras import models
from keras import layers
from keras import callbacks
import warnings
warnings.filterwarnings('ignore')

In [72]:
SEQ_LEN = 72  # how long of a preceeding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 3  # how far into the future are we trying to predict?
RATIO_TO_PREDICT = "ETHTUSD"

In [73]:
def buy_sell_hold(future):
    
    '''DOC STRING'''
    change_perc = 0.01
    
    if future > change_perc:
        return 1
    if future < -change_perc:
        return 0
    return 2

In [74]:
def preprocess_df(df):
    df = df.drop("pct_change", 1)  # don't need this anymore.
    for col in df.columns:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
            df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            df.fillna(method='ffill')  # remove the nas created by pct_change
            df = df.replace([np.inf, -np.inf], 0)
            df.fillna(0, inplace=True)
            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True)  # cleanup again... 


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 
    
    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!
    
    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets
    holds = []  # list that will store our sell sequences and targets
    
    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!
        else:
            holds.append([seq, target])
    
    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!
    random.shuffle(holds)  # shuffle the holds!
    
    lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?
    
    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.
    holds = holds[:lower]  # make sure both lists are only up to the shortest length.
    
    sequential_data = buys+sells+holds  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

    return np.array(X), y  # return X and y...and make X a numpy array!

In [75]:
ratios = ['BTCTUSD', "ETHTUSD" , "XRPTUSD", "LTCTUSD", "EOSTUSD"]

In [76]:
crp_df = pd.DataFrame() # begin empty

for ratio in ratios:  # begin iteration
    print(ratio)
    dataset = f'crypto_dfs/{ratio}.csv'  # get the full path to the file.
    
    df = pd.read_csv(dataset, index_col=0)  # read in specific file

    # rename volume and close to include the ticker so we can still which close/volume is which:
    df.rename(columns={"Close": f"{ratio}_close", "Volume": f"{ratio}_volume"}, inplace=True)

    df.set_index("Time", inplace=True)  # set time as index so we can join them on this shared time
    
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]  # ignore the other columns besides price and volume

    if len(crp_df)==0:  # if the dataframe is empty
        crp_df = df  # then it's just the current df
    else:  # otherwise, join this data to the main one
        crp_df = crp_df.join(df)

print(crp_df.head())

BTCTUSD
ETHTUSD
XRPTUSD
LTCTUSD
EOSTUSD
               BTCTUSD_close  BTCTUSD_volume  ETHTUSD_close  ETHTUSD_volume  \
Time                                                                          
1581346800000        9914.98       42.847840         224.48       437.23191   
1581343200000        9867.88       10.544541         223.24       921.30562   
1581339600000        9811.44       12.148842         219.41       672.55340   
1581336000000        9838.86       10.340284         219.73       279.35173   
1581332400000        9807.88       22.957431         217.91       203.97147   

               XRPTUSD_close  XRPTUSD_volume  LTCTUSD_close  LTCTUSD_volume  \
Time                                                                          
1581346800000        0.27595        150520.2          74.87      1236.85021   
1581343200000        0.27370         20433.0          74.44       300.63211   
1581339600000        0.27156          9748.0          73.41       227.99520   
15813360000

In [77]:
crp_df['pct_change'] = crp_df[[f'{RATIO_TO_PREDICT}_close']].pct_change(24)
crp_df[['pct_change', f'{RATIO_TO_PREDICT}_close']].head()

Unnamed: 0_level_0,pct_change,ETHTUSD_close
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
1581346800000,,224.48
1581343200000,,223.24
1581339600000,,219.41
1581336000000,,219.73
1581332400000,,217.91


In [78]:
crp_df['target'] = crp_df['pct_change'].map(buy_sell_hold)

In [79]:
crp_df['target'].value_counts(normalize=True)

0    0.350962
1    0.326122
2    0.322917
Name: target, dtype: float64

In [80]:
times = sorted(crp_df.index.values)  # get the times
last_20pct = sorted(crp_df.index.values)[-int(0.2*len(times))]  # get the last 20% of the times

In [81]:
validation_main_df = crp_df[(crp_df.index >= last_20pct)]  # make the validation data where the index is in the last 5%
main_df = crp_df[(crp_df.index < last_20pct)]  # now the main_df is all the data up to the last 5%

In [82]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"Train data: {len(train_x)} Validation: {len(validation_x)}")
print(f"Sells: {train_y.count(0)}, Holds: {train_y.count(2)}, Buys: {train_y.count(1)}")
print(f"VALIDATION Sells: {validation_y.count(0)}, Holds: {validation_y.count(2)}, buys: {validation_y.count(1)}")

Train data: 1779 Validation: 282
Sells: 593, Holds: 593, Buys: 593
VALIDATION Sells: 94, Holds: 94, buys: 94


Let's make a few more constants:

In [14]:
EPOCHS = 1  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"  # a unique name for the model

Alright, we've normalized and scaled the data! Next up, we need to create our actual sequences. To do this:

In [15]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint

In [16]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(3, activation='tanh'))

In [17]:
# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

In [18]:
tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

filepath = "LSTM_Final-{epoch:02d}-{val_acc:.3f}"
# unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) 
# saves only the best ones

# CHANGE EPOSCHS

In [19]:
# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

Train on 1779 samples, validate on 282 samples
Epoch 1/1


In [20]:
# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.515015776275743
Test accuracy: 0.5460992928937818


In [21]:
# Save model
model.save("models/{}".format(NAME))

Not a bad start. Better than random, validation accuracy rises over time, validation loss drops.

Changing the name constant to include the ratio we're predicting:

NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
And then testing against all of the ratios:

In [22]:
crp_df_500 = pd.DataFrame() # begin empty

for ratio in ratios:  # begin iteration
    dataset = f'crypto_dfs/{ratio}.csv'  # get the full path to the file.
    
    df = pd.read_csv(dataset, index_col=0)  # read in specific file

    # rename volume and close to include the ticker so we can still which close/volume is which:
    df.rename(columns={"Close": f"{ratio}_close", "Volume": f"{ratio}_volume"}, inplace=True)

    df.set_index("Time", inplace=True)  # set time as index so we can join them on this shared time
    
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]  # ignore the other columns besides price and volume

    
    if len(crp_df_500)==0:  # if the dataframe is empty
        crp_df_500 = df  # then it's just the current df
    else:  # otherwise, join this data to the main one
        crp_df_500 = crp_df_500.merge(df, left_index=True, right_index=True)


In [23]:
crp_df_500.isnull().sum().any()

False

In [24]:
crp_df_500.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,ETHTUSD_close,ETHTUSD_volume,XRPTUSD_close,XRPTUSD_volume,LTCTUSD_close,LTCTUSD_volume,EOSTUSD_close,EOSTUSD_volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1581346800000,9914.98,42.84784,224.48,437.23191,0.27595,150520.2,74.87,1236.85021,4.9057,138.6
1581343200000,9867.88,10.544541,223.24,921.30562,0.2737,20433.0,74.44,300.63211,4.91,20.0
1581339600000,9811.44,12.148842,219.41,672.5534,0.27156,9748.0,73.41,227.9952,4.8321,760.33
1581336000000,9838.86,10.340284,219.73,279.35173,0.27188,13476.4,73.49,91.53713,4.815,176.16
1581332400000,9807.88,22.957431,217.91,203.97147,0.27007,9632.8,73.05,267.5823,4.7682,523.45


In [58]:
crp_df_500['pct_change'] = crp_df_500[[f'{RATIO_TO_PREDICT}_close']].pct_change(24)
crp_df_500[['pct_change', f'{RATIO_TO_PREDICT}_close']].head()

Unnamed: 0_level_0,pct_change,ETHTUSD_close
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
1581346800000,,224.48
1581343200000,,223.24
1581339600000,,219.41
1581336000000,,219.73
1581332400000,,217.91


In [26]:
crp_df_500['target'] = crp_df_500['pct_change'].map(buy_sell_hold)

In [27]:
crp_df_500['target'].value_counts(normalize=True)

0    0.350962
1    0.326122
2    0.322917
Name: target, dtype: float64

In [28]:
times = sorted(crp_df_500.index.values)  # get the times
last_20pct = sorted(crp_df_500.index.values)[-int(0.2*len(times))]  # get the last 20% of the times

In [29]:
validation_main_df = crp_df_500[(crp_df_500.index >= last_20pct)]  # make the validation data where the index is in the last 5%
main_df = crp_df_500[(crp_df_500.index < last_20pct)]  # now the main_df is all the data up to the last 5%

In [31]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"Train data: {len(train_x)} Validation: {len(validation_x)}")
print(f"Sells: {train_y.count(0)}, Holds: {train_y.count(2)}, Buys: {train_y.count(1)}")
print(f"VALIDATION Sells: {validation_y.count(0)}, Holds: {validation_y.count(2)}, buys: {validation_y.count(1)}")

Train data: 1779 Validation: 282
Sells: 593, Holds: 593, Buys: 593
VALIDATION Sells: 94, Holds: 94, buys: 94


Let's make a few more constants:

In [32]:
EPOCHS = 1  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"  # a unique name for the model

Alright, we've normalized and scaled the data! Next up, we need to create our actual sequences. To do this:

In [33]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint

In [34]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(3, activation='tanh'))

In [35]:
# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

In [36]:
tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

filepath = "LSTM_Final-{epoch:02d}-{val_acc:.3f}"
# unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) 
# saves only the best ones

In [37]:
# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

Train on 1779 samples, validate on 282 samples
Epoch 1/1


In [38]:
# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 5.622697087889867
Test accuracy: 0.5780141835517072


In [39]:
# Save model
model.save("models/{}".format(NAME))

Not a bad start. Better than random, validation accuracy rises over time, validation loss drops.

Changing the name constant to include the ratio we're predicting:

NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
And then testing against all of the ratios:

# Use 5 ratios and target

In [40]:
crp_df = pd.DataFrame() # begin empty
ratios = ['BTCTUSD', "ETHTUSD" , "XRPTUSD", "LTCTUSD", "EOSTUSD"]

for ratio in ratios:  # begin iteration
    dataset = f'crypto_dfs/{ratio}.csv'  # get the full path to the file.
    
    df = pd.read_csv(dataset, index_col=0)  # read in specific file

    # rename volume and close to include the ticker so we can still which close/volume is which:
    df.rename(columns={"Close": f"{ratio}_close", "Volume": f"{ratio}_volume"}, inplace=True)

    df.set_index("Time", inplace=True)  # set time as index so we can join them on this shared time
    
    df[f'{ratio}_pct_change'] = df[[f'{ratio}_close']].pct_change(1)
    
    df[f'{ratio}_target'] = df[f'{ratio}_pct_change'].map(buy_sell_hold)
    
    df = df[[f"{ratio}_close", f"{ratio}_volume", f'{ratio}_pct_change', f'{ratio}_target']]  # ignore the other columns besides price and volume

    
    if len(crp_df)==0:  # if the dataframe is empty
        crp_df = df  # then it's just the current df
    else:  # otherwise, join this data to the main one
        crp_df = crp_df.merge(df, left_index=True, right_index=True)


In [41]:
crp_df.isnull().sum().any()

True

In [42]:
crp_df.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,BTCTUSD_pct_change,BTCTUSD_target,ETHTUSD_close,ETHTUSD_volume,ETHTUSD_pct_change,ETHTUSD_target,XRPTUSD_close,XRPTUSD_volume,XRPTUSD_pct_change,XRPTUSD_target,LTCTUSD_close,LTCTUSD_volume,LTCTUSD_pct_change,LTCTUSD_target,EOSTUSD_close,EOSTUSD_volume,EOSTUSD_pct_change,EOSTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1581346800000,9914.98,42.84784,,2,224.48,437.23191,,2,0.27595,150520.2,,2,74.87,1236.85021,,2,4.9057,138.6,,2
1581343200000,9867.88,10.544541,-0.00475,2,223.24,921.30562,-0.005524,2,0.2737,20433.0,-0.008154,2,74.44,300.63211,-0.005743,2,4.91,20.0,0.000877,2
1581339600000,9811.44,12.148842,-0.00572,2,219.41,672.5534,-0.017156,0,0.27156,9748.0,-0.007819,2,73.41,227.9952,-0.013837,0,4.8321,760.33,-0.015866,0
1581336000000,9838.86,10.340284,0.002795,2,219.73,279.35173,0.001458,2,0.27188,13476.4,0.001178,2,73.49,91.53713,0.00109,2,4.815,176.16,-0.003539,2
1581332400000,9807.88,22.957431,-0.003149,2,217.91,203.97147,-0.008283,2,0.27007,9632.8,-0.006657,2,73.05,267.5823,-0.005987,2,4.7682,523.45,-0.00972,2


In [43]:
df = crp_df[crp_df.columns.drop(list(crp_df.filter(regex='pct')))]
for col in df.columns:  # go through all of the columns
    if not 'target' in col:  # normalize all ... except for the target itself!
        df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
        df.fillna(method='ffill')  # remove the nas created by pct_change
        df = df.replace([np.inf, -np.inf], 0)
        df.fillna(0, inplace=True)
        df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.
    
df.columns

Index(['BTCTUSD_close', 'BTCTUSD_volume', 'BTCTUSD_target', 'ETHTUSD_close',
       'ETHTUSD_volume', 'ETHTUSD_target', 'XRPTUSD_close', 'XRPTUSD_volume',
       'XRPTUSD_target', 'LTCTUSD_close', 'LTCTUSD_volume', 'LTCTUSD_target',
       'EOSTUSD_close', 'EOSTUSD_volume', 'EOSTUSD_target'],
      dtype='object')

In [44]:
df.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,BTCTUSD_target,ETHTUSD_close,ETHTUSD_volume,ETHTUSD_target,XRPTUSD_close,XRPTUSD_volume,XRPTUSD_target,LTCTUSD_close,LTCTUSD_volume,LTCTUSD_target,EOSTUSD_close,EOSTUSD_volume,EOSTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1581346800000,0.000834,-0.285455,2,0.007351,-0.135991,2,-0.008669,-0.111854,2,0.006044,-0.118777,2,0.010796,-0.09496,2
1581343200000,-0.869334,-0.604839,2,-0.817064,-0.082808,2,-1.203838,-0.12713,2,-0.666561,-0.134589,2,0.103169,-0.109624,2
1581339600000,-1.046867,-0.221,2,-2.553171,-0.14896,0,-1.154752,-0.121097,2,-1.614386,-0.123824,0,-1.661187,0.539394,0
1581336000000,0.512761,-0.348521,2,0.225019,-0.164074,2,0.164059,-0.105093,2,0.133669,-0.131279,2,-0.362141,-0.108127,2
1581332400000,-0.575947,0.231466,2,-1.228835,-0.148953,2,-0.984509,-0.116895,2,-0.695127,-0.078603,2,-1.0135,-0.061175,2


In [45]:
df['XRPTUSD_target'].head(15)

Time
1581346800000    2
1581343200000    2
1581339600000    2
1581336000000    2
1581332400000    2
1581328800000    2
1581325200000    2
1581321600000    2
1581318000000    2
1581314400000    2
1581310800000    0
1581307200000    1
1581303600000    2
1581300000000    2
1581296400000    2
Name: XRPTUSD_target, dtype: int64

In [46]:
df['XRPTUSD_target'].shift(24).head(25)

Time
1581346800000    NaN
1581343200000    NaN
1581339600000    NaN
1581336000000    NaN
1581332400000    NaN
1581328800000    NaN
1581325200000    NaN
1581321600000    NaN
1581318000000    NaN
1581314400000    NaN
1581310800000    NaN
1581307200000    NaN
1581303600000    NaN
1581300000000    NaN
1581296400000    NaN
1581292800000    NaN
1581289200000    NaN
1581285600000    NaN
1581282000000    NaN
1581278400000    NaN
1581274800000    NaN
1581271200000    NaN
1581267600000    NaN
1581264000000    NaN
1581260400000    2.0
Name: XRPTUSD_target, dtype: float64

In [47]:
def neural_neur_ntw_target(symbol):
    df_nn = df.copy()
    symbol = symbol + '_target'
    df_nn[symbol] = df_nn[symbol].shift(24)
    df_nn.dropna(inplace=True)
    
    return df_nn

In [48]:
df_target = neural_neur_ntw_target('XRPTUSD')

In [49]:
df_target.isnull().any().any()

False

In [50]:
df_target.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,BTCTUSD_target,ETHTUSD_close,ETHTUSD_volume,ETHTUSD_target,XRPTUSD_close,XRPTUSD_volume,XRPTUSD_target,LTCTUSD_close,LTCTUSD_volume,LTCTUSD_target,EOSTUSD_close,EOSTUSD_volume,EOSTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1581260400000,-0.069258,-0.080171,2,-0.575461,-0.121436,2,-0.711333,-0.072111,2.0,-0.742516,-0.132462,2,-0.521244,-0.044424,2
1581256800000,-0.4139,-0.555723,2,-0.597468,-0.164268,2,-0.447988,-0.112671,2.0,-0.147706,-0.096997,2,-0.352827,-0.105049,2
1581253200000,0.295202,-0.468765,2,0.990881,-0.130898,2,1.182107,-0.120454,2.0,0.775805,-0.135802,2,0.734121,-0.108657,2
1581249600000,0.31975,0.450077,2,0.768039,-0.139576,2,0.199469,-0.112095,2.0,0.770779,-0.133405,2,0.149359,0.002295,2
1581246000000,-0.313909,-0.310217,2,-0.514601,-0.106181,2,0.012115,-0.108727,2.0,0.127608,-0.08594,2,0.347168,-0.105859,2


In [51]:
df_target[[c for c in df_target if c not in ['XRPTUSD_target']] 
       + ['XRPTUSD_target']]


Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,BTCTUSD_target,ETHTUSD_close,ETHTUSD_volume,ETHTUSD_target,XRPTUSD_close,XRPTUSD_volume,LTCTUSD_close,LTCTUSD_volume,LTCTUSD_target,EOSTUSD_close,EOSTUSD_volume,EOSTUSD_target,XRPTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1581260400000,-0.069258,-0.080171,2,-0.575461,-0.121436,2,-0.711333,-0.072111,-0.742516,-0.132462,2,-0.521244,-0.044424,2,2.0
1581256800000,-0.413900,-0.555723,2,-0.597468,-0.164268,2,-0.447988,-0.112671,-0.147706,-0.096997,2,-0.352827,-0.105049,2,2.0
1581253200000,0.295202,-0.468765,2,0.990881,-0.130898,2,1.182107,-0.120454,0.775805,-0.135802,2,0.734121,-0.108657,2,2.0
1581249600000,0.319750,0.450077,2,0.768039,-0.139576,2,0.199469,-0.112095,0.770779,-0.133405,2,0.149359,0.002295,2,2.0
1581246000000,-0.313909,-0.310217,2,-0.514601,-0.106181,2,0.012115,-0.108727,0.127608,-0.085940,2,0.347168,-0.105859,2,2.0
1581242400000,0.456626,0.015834,2,0.524587,-0.138985,2,0.760239,-0.103401,0.173021,-0.117412,2,-0.207786,-0.107784,2,2.0
1581238800000,-1.049554,-0.302237,2,-0.442853,-0.160825,2,-1.223199,-0.118270,-0.160695,-0.081634,2,-0.118925,-0.030229,2,2.0
1581235200000,0.690397,-0.463575,2,0.413105,-0.053158,2,0.814731,-0.103813,0.415896,-0.100115,2,0.257782,-0.092592,2,2.0
1581231600000,0.291680,0.264277,2,0.725286,-0.144130,2,1.421641,-0.111498,0.898524,-0.121876,2,1.697421,0.039795,1,2.0
1581228000000,-0.298055,-0.509066,2,-0.752615,-0.127899,2,0.088843,-0.082225,-0.864673,-0.125986,2,-2.050684,-0.110167,0,2.0


In [56]:
sequential_data = []  # this is a list that will CONTAIN the sequences
prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 
df_target = df_target[[c for c in df_target if c not in ['XRPTUSD_target']] + ['XRPTUSD_target']]
df_target

df_target = df_target.drop(['BTCTUSD_target','ETHTUSD_target','LTCTUSD_target', 'EOSTUSD_target'], axis=1)
df_target.head()
    

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,ETHTUSD_close,ETHTUSD_volume,XRPTUSD_close,XRPTUSD_volume,LTCTUSD_close,LTCTUSD_volume,EOSTUSD_close,EOSTUSD_volume,XRPTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1581260400000,-0.069258,-0.080171,-0.575461,-0.121436,-0.711333,-0.072111,-0.742516,-0.132462,-0.521244,-0.044424,2.0
1581256800000,-0.4139,-0.555723,-0.597468,-0.164268,-0.447988,-0.112671,-0.147706,-0.096997,-0.352827,-0.105049,2.0
1581253200000,0.295202,-0.468765,0.990881,-0.130898,1.182107,-0.120454,0.775805,-0.135802,0.734121,-0.108657,2.0
1581249600000,0.31975,0.450077,0.768039,-0.139576,0.199469,-0.112095,0.770779,-0.133405,0.149359,0.002295,2.0
1581246000000,-0.313909,-0.310217,-0.514601,-0.106181,0.012115,-0.108727,0.127608,-0.08594,0.347168,-0.105859,2.0


In [70]:
for i in df_target.values:  # iterate over the values
    prev_days.append([n for n in i[:-1]])  # store all but the target
    if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
        sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

random.shuffle(sequential_data)  # shuffle for good measure.

random.shuffle(sequential_data)  # shuffle for good measure.

buys = []  # list that will store our buy sequences and targets
sells = []  # list that will store our sell sequences and targets
holds = []  # list that will store our sell sequences and targets

for seq, target in sequential_data:  # iterate over the sequential data
    if target == 0:  # if it's a "not buy"
        sells.append([seq, target])  # append to sells list
    elif target == 1:  # otherwise if the target is a 1...
        buys.append([seq, target])  # it's a buy!
    else:
        holds.append([seq, target])

random.shuffle(buys)  # shuffle the buys
random.shuffle(sells)  # shuffle the sells!
random.shuffle(holds)  # shuffle the holds!

lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?

buys = buys[:lower]  # make sure both lists are only up to the shortest length.
sells = sells[:lower]  # make sure both lists are only up to the shortest length.
holds = holds[:lower]  # make sure both lists are only up to the shortest length.

sequential_data = buys+sells+holds  # add them together
random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

X = []
y = []

for seq, target in sequential_data:  # going over our new sequential data
    X.append(seq)  # X is the sequences
    y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

return np.array(X), y  # return X and y...and make X a numpy array!

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [82]:
sequential_data = []  # this is a list that will CONTAIN the sequences
prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 
df_target = df_target[[c for c in df_target if c not in ['XRPTUSD_target']] + ['XRPTUSD_target']]
for i in df_target.drop(['BTCTUSD_target','ETHTUSD_target','LTCTUSD_target', 'EOSTUSD_target'], axis=1).values:
    print(df_target)
    

    #prev_days.append([n for n in i[:-1]])  # store all but the target
    #if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
    #    sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!


KeyError: "['BTCTUSD_target' 'ETHTUSD_target' 'LTCTUSD_target' 'EOSTUSD_target'] not found in axis"

In [77]:
df_target.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,BTCTUSD_target,ETHTUSD_close,ETHTUSD_volume,ETHTUSD_target,XRPTUSD_close,XRPTUSD_volume,LTCTUSD_close,LTCTUSD_volume,LTCTUSD_target,EOSTUSD_close,EOSTUSD_volume,EOSTUSD_target,XRPTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1581260400000,-0.069258,-0.080171,2,-0.575461,-0.121436,2,-0.711333,-0.072111,-0.742516,-0.132462,2,-0.521244,-0.044424,2,2.0
1581256800000,-0.4139,-0.555723,2,-0.597468,-0.164268,2,-0.447988,-0.112671,-0.147706,-0.096997,2,-0.352827,-0.105049,2,2.0
1581253200000,0.295202,-0.468765,2,0.990881,-0.130898,2,1.182107,-0.120454,0.775805,-0.135802,2,0.734121,-0.108657,2,2.0
1581249600000,0.31975,0.450077,2,0.768039,-0.139576,2,0.199469,-0.112095,0.770779,-0.133405,2,0.149359,0.002295,2,2.0
1581246000000,-0.313909,-0.310217,2,-0.514601,-0.106181,2,0.012115,-0.108727,0.127608,-0.08594,2,0.347168,-0.105859,2,2.0


In [65]:
def preprocess_neur_net_df(df):

    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 
    df = df[[c for c in df if c not in ['XRPTUSD_target']] + ['XRPTUSD_target']]
    for i in df.drop(['BTCTUSD_target','ETHTUSD_target','LTCTUSD_target', 'EOSTUSD_target'], axis=1).values:
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[:-1]])  # append those bad boys!
    
    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets
    holds = []  # list that will store our sell sequences and targets
    
    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!
        else:
            holds.append([seq, target])
    
    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!
    random.shuffle(holds)  # shuffle the holds!
    
    lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?
    
    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.
    holds = holds[:lower]  # make sure both lists are only up to the shortest length.
    
    sequential_data = buys+sells+holds  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

    return np.array(X), y  # return X and y...and make X a numpy array!

In [53]:
def preprocess_neur_net_df(df):

    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 

    df = df[[c for c in df if c not in ['XRPTUSD_target']] + ['XRPTUSD_target']]
    for i in df.drop(['BTCTUSD_target','ETHTUSD_target','LTCTUSD_target', 'EOSTUSD_target'], axis=1).values:
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[:-1]])  # append those bad boys!
    
    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets
    holds = []  # list that will store our sell sequences and targets
    
    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!
        else:
            holds.append([seq, target])
    
    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!
    random.shuffle(holds)  # shuffle the holds!
    
    lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?
    
    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.
    holds = holds[:lower]  # make sure both lists are only up to the shortest length.
    
    sequential_data = buys+sells+holds  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

    return np.array(X), y  # return X and y...and make X a numpy array!

In [80]:
sequential_data = []  # this is a list that will CONTAIN the sequences
prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. 
buys = []  # list that will store our buy sequences and targets
sells = []  # list that will store our sell sequences and targets
holds = []  # list that will store our sell sequences and targets

for seq, target in sequential_data:  # iterate over the sequential data
    if target == 0:  # if it's a "not buy"
        sells.append([seq, target])  # append to sells list
    elif target == 1:  # otherwise if the target is a 1...
        buys.append([seq, target])  # it's a buy!
    else:
        holds.append([seq, target])

random.shuffle(buys)  # shuffle the buys
random.shuffle(sells)  # shuffle the sells!
random.shuffle(holds)  # shuffle the holds!

lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?

buys = buys[:lower]  # make sure both lists are only up to the shortest length.
sells = sells[:lower]  # make sure both lists are only up to the shortest length.
holds = holds[:lower]  # make sure both lists are only up to the shortest length.

sequential_data = buys+sells+holds  # add them together
random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

X = []
y = []

for seq, target in sequential_data:  # going over our new sequential data
    X.append(seq)  # X is the sequences
    y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

return np.array(X), y  # return X and y...and make X a numpy array!

AttributeError: 'NoneType' object has no attribute 'values'

In [81]:
df_target.head()

Unnamed: 0_level_0,BTCTUSD_close,BTCTUSD_volume,ETHTUSD_close,ETHTUSD_volume,XRPTUSD_close,XRPTUSD_volume,LTCTUSD_close,LTCTUSD_volume,EOSTUSD_close,EOSTUSD_volume,XRPTUSD_target
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1581260400000,-0.069258,-0.080171,-0.575461,-0.121436,-0.711333,-0.072111,-0.742516,-0.132462,-0.521244,-0.044424,2.0
1581256800000,-0.4139,-0.555723,-0.597468,-0.164268,-0.447988,-0.112671,-0.147706,-0.096997,-0.352827,-0.105049,2.0
1581253200000,0.295202,-0.468765,0.990881,-0.130898,1.182107,-0.120454,0.775805,-0.135802,0.734121,-0.108657,2.0
1581249600000,0.31975,0.450077,0.768039,-0.139576,0.199469,-0.112095,0.770779,-0.133405,0.149359,0.002295,2.0
1581246000000,-0.313909,-0.310217,-0.514601,-0.106181,0.012115,-0.108727,0.127608,-0.08594,0.347168,-0.105859,2.0


In [None]:
buys = []  # list that will store our buy sequences and targets
sells = []  # list that will store our sell sequences and targets
holds = []  # list that will store our sell sequences and targets

for seq, target in sequential_data:  # iterate over the sequential data
    if target == 0:  # if it's a "not buy"
        sells.append([seq, target])  # append to sells list
    elif target == 1:  # otherwise if the target is a 1...
        buys.append([seq, target])  # it's a buy!
    else:
        holds.append([seq, target])

random.shuffle(buys)  # shuffle the buys
random.shuffle(sells)  # shuffle the sells!
random.shuffle(holds)  # shuffle the holds!

lower = min(len(buys), len(sells), len(holds))  # what's the shorter length?

buys = buys[:lower]  # make sure both lists are only up to the shortest length.
sells = sells[:lower]  # make sure both lists are only up to the shortest length.
holds = holds[:lower]  # make sure both lists are only up to the shortest length.

sequential_data = buys+sells+holds  # add them together
random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

X = []
y = []

for seq, target in sequential_data:  # going over our new sequential data
    X.append(seq)  # X is the sequences
    y.append(target)  # y is the targets/labels (buys vs sell/notbuy vs holds)

return np.array(X), y  # return X and y...and make X a numpy array!

In [54]:
times = sorted(df_target.index.values)  # get the times
last_20pct = sorted(df_target.index.values)[-int(0.2*len(times))]  # get the last 20% of the times

In [55]:
validation_main_df = df_target[(df_target.index >= last_20pct)]  # make the validation data where the index is in the last 5%
main_df = df_target[(df_target.index < last_20pct)]  # now the main_df is all the data up to the last 5%

In [66]:
train_x, train_y = preprocess_neur_net_df(main_df)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [56]:
train_x, train_y = preprocess_neur_net_df(main_df)
validation_x, validation_y = preprocess_neur_net_df(validation_main_df)

print(f"Train data: {len(train_x)} Validation: {len(validation_x)}")
print(f"Sells: {train_y.count(0)}, Holds: {train_y.count(2)}, Buys: {train_y.count(1)}")
print(f"VALIDATION Sells: {validation_y.count(0)}, Holds: {validation_y.count(2)}, buys: {validation_y.count(1)}")

KeyError: ''

In [None]:
EPOCHS = 20  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"  # a unique name for the model

Alright, we've normalized and scaled the data! Next up, we need to create our actual sequences. To do this:

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(3, activation='tanh'))

In [None]:
# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

In [None]:
tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

filepath = "LSTM_Final-{epoch:02d}-{val_acc:.3f}"
# unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) 
# saves only the best ones

In [None]:
# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

In [None]:
# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])