In [99]:
import lstm
import continuous_functions as cf
import functions
import time
import sys
import os
import random
from datetime import datetime
import numpy as np
import pandas as pd
from tensorflow import set_random_seed
from keras.callbacks import Callback, LearningRateScheduler
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

random.seed(1102) # ensuring reproducibility
np.random.seed(1102) # same
set_random_seed(1102) # yes, python sucks

#epochs = int(sys.argv[1]) # input the epochs from command line (for model testing purposes only)
epochs = 2 # input the epochs from command line (for model testing purposes only)
# using unix or windows, change to False if using Windows.
unix = False

#  specify the model you want to run:
#  options are: 1, 2, "2reg", 3, 4, "chinese"

which_model = "2reg"

# parameters
# these parameters need to be adjusted to optimize the model
seq_len = 300            # this is the amount of data (in minutes) used for the forecast
forecast_len = 60        # this is the forecast time in minutes
dense_units = 16        # this number needs to be played with (last best = 128)
lstm_units = 16      # ídem (last best = 64)
dropout = 0.75           # also (last best = 0.2)
learning_rate = 0.01   # (last best = 0.0001)
batch_size = 256*32        #
interval = 1            # explanation in following text:

"""
The way training data is generated is the following: we take all historical
data and store it in df. This is then a dataframe with dimensions
number_of_coins * 2 x minutes (the times 2 comes from storing price and volume)
For each trading index that we compute we add one number of coins, so, if we
are using two indices, we have:

                [number_of_coins * 4 x minutes]

We then pick "training_number" of minutes and use it for prediction, so that df
has number_of_coins * 4 x training_number shape. From this, we create training
data the following way: we pick every "interval" number of minutes and create
a training observation of shape number_of_coins * 4 x seq_len together with a target
observation of shape number_of_coins * 4 x 1 consisting of the observation
"forecast_len" away from the last minute of the training data. We store all
these training data in a numpy array (X_test) with shape:

        [minutes/interval x seq_len x number_of_coins * 4]

together with the target data with shape:

        [minutes/interval x number_of_coins * 4 x 1]

Test data is treated the same but with interval = 1.

Note: minutes/interval sizes are not exact because we have to account for
data for which we don't have enough historical and stuff like that, so it has
minor corrections in the functions.
"""
# trading indices

# rsi
rsi_length = 14     # in days
rsi_gap = 24         # in hours

# stoch
stoch_length = 14   # in days

# other parameters

train_days = 18         # either this one or the one below will be used
pred_days = 3

amount = 100      # initial trading amount
fee = 0.0014 * 2   # simulated fee from trader
p1 = 0.4    # This controls how much money is invested in each prediction,
p2 = 0.3    # so the investment on coin 1 will be amount * p1
p3 = 0.2    # It's technically not required but obviously they should add up to one
p4 = 0.1    # you can add as many percentages as you want, just make sure to add them
p5 = 0.     # also in the dictionary below

      ###################################
###### Do not touch beyond this point!!! ######
      ###################################

# Packing parameters into dictionaries to make funcions more legible:

percentages = {"p1" : p1,
               "p2" : p2,
               "p3" : p3,
               "p4" : p4,
               "p5" : p5}


parameters = {"seq_len": seq_len,
              "forecast_len": forecast_len,
              "dense_units": dense_units,
              "lstm_units": lstm_units,
              "dropout": dropout,
              "learning_rate": learning_rate,
              "batch_size": batch_size,
              "pred_days": pred_days,
              "train_days": train_days,
              "amount": amount,
              "fee": fee,
              "interval": interval,
              "unix": unix}

lrate = LearningRateScheduler(lambda x:  learning_rate / (1. + 0), verbose = 1)
# this function lowers the learning rate at each epoch, it's part of the
# keras callbacks.


In [75]:
import importlib
importlib.reload(functions)
importlib.reload(lstm)

<module 'lstm' from 'C:\\Users\\eudald\\Desktop\\Crypto-master\\Crypto\\src\\AI\\lstm.py'>

In [83]:
pre_df = functions.data_load("new_data_btc.csv")
    # range included in case you don't want to load the whole file (for testing
    # and memory saving purposes)
df = functions.filter_coins(pre_df)
number_of_coins = int(df.shape[0]/2)
training_number = df.shape[1] - 60 * 24 * pred_days # We predict only the last n days

parameters["number"] = training_number # add it to the dictionary

print("> Training model on " + str(int(df.shape[0]/2)) + " coins during " + str(round(training_number/60/24, 1)) + " days...")

    # This function reshapes the input dataframe into usable training data
    # There are two options, train data as it is or also shuffle it.
    # TO-DO: check which is best.
    #X_train, y_train, scaler = cf.train(df, parameters)


> Training model on 120 coins during 57.9 days...


In [84]:
rsi = functions.compute_rsi(df.iloc[::2])
ema = functions.compute_ema(df.iloc[::2])

In [85]:
period =7
leftover = 60*24*period

In [86]:
new_df = df.iloc[:,leftover:training_number]
new_rsi = rsi.iloc[:,leftover:training_number]
new_ema = ema.iloc[:,leftover:training_number]

In [87]:
X_train, y_train, scaler = cf.train(new_df, parameters)
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)

In [88]:
new_rsi = new_rsi.iloc[:,:training_number - seq_len - forecast_len-leftover:interval].transpose()
new_ema = new_ema.iloc[:,:training_number - seq_len - forecast_len-leftover:interval].transpose()

In [89]:
indices = pd.concat([new_rsi, new_ema], axis = 1)

In [90]:
indices_scaler = StandardScaler().fit(indices)  # define the scaler
norm_indices = indices_scaler.transform(indices)  # rescale data

In [91]:
norm_indices = pd.DataFrame(norm_indices, index = indices.index, columns = indices.columns)

In [92]:
dims = [X_train.shape[2], seq_len, dense_units, lstm_units]

In [93]:
model = lstm.build_model2_reg_inputed(dims, dropout, learning_rate, norm_indices.shape[1])

> Compilation Time :  0.020089387893676758


In [100]:
history = model.fit(
    [X_train, norm_indices],
    y_train,
    batch_size = batch_size,
    epochs = 10,
    validation_split = 0.25,
    callbacks = [lrate])

Train on 10948 samples, validate on 3650 samples
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.1.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.1.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.1.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.1.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.1.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.1.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.1.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.1.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.1.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.1.
