In [69]:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint


In [65]:
SEQ_LEN_IN_MINS = 60 #Will use last 60 minutes of data to predict 
FUTURE_PERIOD_PREDICT_IN_MINS = 3 #how many minutes forward we will be predicting
CRYPTO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN_IN_MINS}-SEQ-{FUTURE_PERIOD_PREDICT_IN_MINS}-PRED-{int(time.time())}"

In [28]:
#trying to make the model learn the relation-when prices go up and when prices go down

def classify(current_price, future_price):
    if float(future_price)>float(current_price):
        return 1
    else:
        return 0

In [63]:
def preprocess_data(df):
    df = df.drop("future", axis = 1) 
    
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()  #this will normalize all the data
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN_IN_MINS) #as the list reaches 60, it removes the prev items and renews
    for i in df.values:
        prev_days.append([n for n in i[:-1]])   #to avoid taking in target column
        if len(prev_days) == SEQ_LEN_IN_MINS:
            sequential_data.append([np.array(prev_days), i[-1]])
        
    random.shuffle(sequential_data)
    
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
            
    #to balance out the data
    lower = min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys+sells
    
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y
    
        

In [29]:
df1 = pd.read_csv("crypto_data/LTC-USD.csv", names = ["time", "low", "high", "open", "close", "volume"])

In [30]:
df1.head()

Unnamed: 0,time,low,high,open,close,volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


In [31]:
main_df = pd.DataFrame()

In [32]:
crypto_names = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]

In [33]:
for name in crypto_names:
    dataset = f"crypto_data/{name}.csv"
    df = pd.read_csv(dataset, names = ["time", "low", "high", "open", f"{name}_close", f"{name}_volume"])
#     print(df.head())
    df.set_index("time", inplace=True)
    df = df[[f"{name}_close", f"{name}_volume"]]
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)


In [34]:
# for c in main_df.columns:
#     print(c)

In [35]:
#the future price(after 3 mins) of the crypto at a given time will be the closing time for the crypto at a time 3 mins in the future
main_df['future'] = main_df[f"{CRYPTO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT_IN_MINS)

In [36]:
print(main_df[[f"{CRYPTO_TO_PREDICT}_close", "future"]].head())

            LTC-USD_close     future
time                                
1528968660      96.580002  96.500000
1528968720      96.660004  96.389999
1528968780      96.570000  96.519997
1528968840      96.500000  96.440002
1528968900      96.389999  96.470001


In [37]:
main_df['target'] = list(map(classify, main_df[f"{CRYPTO_TO_PREDICT}_close"], main_df["future"]))

In [39]:
print(main_df[[f"{CRYPTO_TO_PREDICT}_close", "future", "target"]].head())

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1


we'll separate out the last 5% data as the validation data

In [40]:
times = sorted(main_df.index.values)

In [42]:
last_5per = times[-int(0.05*len(times))]
print(last_5per)

1534922100


In [43]:
validation_data = main_df[(main_df.index >= last_5per)]
main_df = main_df[(main_df.index < last_5per)]

In [57]:
#scale the data
preprocess_data(main_df)

In [112]:
train_x, train_y = preprocess_data(main_df)
validation_x, validation_y = preprocess_data(validation_data)

# train_x = np.asarray(train_x)
# train_y = np.asarray(train_y)
# validation_x = np.asarray(validation_x)
# validation_y = np.asarray(validation_y)

train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
validation_x = np.asarray(validation_x)
validation_y = np.asarray(validation_y)


In [116]:
# print(f"train data: {len(train_x)} validation: {len(validation_x)}")
# print(f"dont buys: {train_y.count(0)} buys: {train_y.count(1)}")
# print(f"VALIDATION dont buys: {validation_y.count(0)} buys: {validation_y.count(1)}")




In [115]:
model = Sequential()
model.add(LSTM(128, input_shape=train_x.shape[1:], return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=train_x.shape[1:], return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=train_x.shape[1:]))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-06)

model.compile(loss="sparse_categorical_crossentropy",
             optimizer = opt,
             metrics=["accuracy"])

tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

history = model.fit(
train_x, train_y,
batch_size = BATCH_SIZE,
epochs = EPOCHS,
validation_data = (validation_x, validation_y),
callbacks=[(tensorboard, checkpoint)])



Epoch 1/10


2022-07-25 20:48:35.051433: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:35.385079: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:35.509104: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:35.619781: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:35.794653: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:35.981813: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:48:36.162249: I tensorflow/core/grappler/optimizers/cust



2022-07-25 20:49:31.489342: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:49:31.614721: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:49:31.720317: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-25 20:49:31.819791: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


INFO:tensorflow:Assets written to: models/RNN_Final-01-0.525.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-01-0.525.model/assets


Epoch 2/10



INFO:tensorflow:Assets written to: models/RNN_Final-02-0.519.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-02-0.519.model/assets


Epoch 3/10



INFO:tensorflow:Assets written to: models/RNN_Final-03-0.508.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-03-0.508.model/assets


Epoch 4/10



INFO:tensorflow:Assets written to: models/RNN_Final-04-0.551.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-04-0.551.model/assets


Epoch 5/10



INFO:tensorflow:Assets written to: models/RNN_Final-05-0.550.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-05-0.550.model/assets


Epoch 6/10



INFO:tensorflow:Assets written to: models/RNN_Final-06-0.533.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-06-0.533.model/assets


Epoch 7/10



INFO:tensorflow:Assets written to: models/RNN_Final-07-0.539.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-07-0.539.model/assets


Epoch 8/10



INFO:tensorflow:Assets written to: models/RNN_Final-08-0.560.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-08-0.560.model/assets


Epoch 9/10



INFO:tensorflow:Assets written to: models/RNN_Final-09-0.559.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-09-0.559.model/assets


Epoch 10/10



INFO:tensorflow:Assets written to: models/RNN_Final-10-0.542.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-10-0.542.model/assets


