# Cryptocurrency Price Prediction

## This project uses a recurrent neural network called Long Short Term Memory (LSTM) to predict cryptocurrency prices using previous closing prices and volume

## Don't use this for real trading operations as this is for study purposes only

### Importing libraries

In [4]:
from binance.client import Client
import pandas as pd
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
import sklearn
from sklearn import preprocessing
from collections import deque
import random
import time

### Get public and private keys from my account on Binance to check cryptocurrency prices

In [5]:
Pkey = "   " #Enter with public key
Skey = "   " #Enter with private key

client = Client(Pkey, Skey)

### Creating classify function to check if the coin should be sold (return 1) or not do anything (return 0)

In [6]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

### Creating preprocess function to scale the data between 0 and 1 and to split the data into sequences of 60 values each (1 value per minute) so the neural network can learn how the prices behave in periods of 1 hour

In [7]:

def preprocess_df(df):
    df = df.drop("Prediction", 1)
    
    for col in df.columns:  # go through all of the columns
        if col != 'Target':  # normalize all ... except for the target itself!
            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True) #cleanup again
    
    
    sequential_data = []
    prev_days = deque(maxlen=seq_len)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == seq_len:
            sequential_data.append([np.array(prev_days), i[-1]]) #Append an numpy array with previous days and the label(target)
    
    random.shuffle(sequential_data)       
    
    buys = []
    sells = []
    
    for seq, Target in sequential_data:
        if Target == 0:
            sells.append([seq, Target])
        elif Target == 1:
            buys.append([seq, Target])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells)) #get which one is lower
    
    buys = buys[:lower] #buys doesn't go past the lower
    sells = sells[:lower] #sells doesn't go past the lower
    
    sequential_data = buys+sells
    
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, Target in sequential_data:
        X.append(seq)
        y.append(Target)
    
    return np.array(X), np.array(y) #make X and y numpy arrays


### Getting the close price and volume of every minute for the past month and add them to a dataframe

In [8]:
seq_len = 60 #defining the size of the sequences
predict = 1 #predict one minute in the future
epochs = 10
batch_size = 64

candles = client.get_historical_klines("BNBBTC", Client.KLINE_INTERVAL_1MINUTE, "1 month ago UTC") # get prices per minute from last month till now
candles_clean = pd.DataFrame(candles, columns=["Open_time", "Open", "High", "Low", "Close", "Volume", "Close Time", "Quote_volume", "Num of Trades", "Taker Buy Base", "Taker Buy Quote", "Ignore"]) #Create Pandas dataframe
candles_last = candles_clean.drop(columns=["Open_time", "Open", "High", "Low", "Close Time", "Quote_volume", "Num of Trades", "Taker Buy Base", "Taker Buy Quote", "Ignore"]) #Keeping just Close price and Volume
candles_last["Prediction"] = candles_last[["Close"]].shift(-predict) # shift all the values 1 position to compare with future prices
candles_last["Target"] = list(map(classify, candles_last["Close"], candles_last["Prediction"])) #Creates target column, which is going to have values of 1 and 0(sell or don't do anything)


### Spliting 95% of the data into training data and 5% into validation data, and preprocess them both

In [9]:
times = sorted(candles_last.index.values)
last_5pct = times[-int(0.05*len(times))]

candles_validation = candles_last[(candles_last.index >= last_5pct)] # 5% of data for validation
candles_train = candles_last[(candles_last.index < last_5pct)] # 95% of data for training

validation_x, validation_y = preprocess_df(candles_validation) #preprocess validation data
train_x, train_y = preprocess_df(candles_train) #preprocess training data
 
print("Train data: " + str(len(train_x)) + "  " + "Test data: " + str(len(validation_x)))

Train data: 40184  Test data: 2130


### Building and training the model

In [10]:
# Building the model (RNN - LSTM)

model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_data=(validation_x, validation_y))

Train on 40184 samples, validate on 2130 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x292765ac188>

### My overal accuracy was around 50% most of the time. I'm aware this is still not a great accuracy and the project can be improved much more. As I've mentioned before, this was made only for study purposes.