In [14]:
from alpaca.data.historical import CryptoHistoricalDataClient
from alpaca.data.requests import CryptoBarsRequest
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
from alpaca.trading.client import TradingClient
from alpaca.trading.requests import MarketOrderRequest
from alpaca.trading.enums import OrderSide, TimeInForce
from dateutil.relativedelta import relativedelta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
import logging
import asyncio
import os
import datetime
from dotenv import load_dotenv

api_key = os.environ['ALPACA_PAPER_TRADING_KEY_ID']
secret_key = os.environ['ALPACA_PAPER_TRADING_SECRET_KEY']
trading_client = TradingClient(api_key = api_key, secret_key=secret_key)
crypto_historical_data_client = CryptoHistoricalDataClient()

In [21]:
crypto_historical_data_client = CryptoHistoricalDataClient()
product = 'BTC/USD'
start_time = datetime.datetime.utcnow() - datetime.timedelta(minutes = 15000)
bars_request = CryptoBarsRequest(
    symbol_or_symbols=product, 
    start = start_time, 
    timeframe=TimeFrame(amount = 15, unit = TimeFrameUnit.Minute)
)

df = crypto_historical_data_client.get_crypto_bars(bars_request).df
df.drop_duplicates(inplace=True)


In [22]:
df['log_close'] = df['close'].apply(np.log)
df['log_return'] = df['log_close'].diff()
df.dropna(inplace = True)
return_series = df['log_return'].values

We have our return series. Next, we are going to split our data into training set and test set. The dataset we have here is the past 100 period returns and then the target variable is the return in the next period


In [37]:
# After we have this, we need to create an LSTM model with this
num_neurons = 20
dropout = 0.3

model = Sequential() 
model.add(LSTM(
    num_neurons, 
    input_shape = (100, 1), 
    return_sequences = True
))

model.add(Dropout(dropout))
model.add(LSTM(num_neurons, return_sequences=True))
model.add(Dropout(dropout))
model.add(LSTM(num_neurons))
model.add(Dropout(dropout))

model.add(Dense(units = 1))
model.add(Activation('linear'))
model.compile(loss = 'mse', optimizer = 'adam')

In [40]:
scaler = MinMaxScaler() 
scaled_data = scaler.fit_transform(X = return_series.reshape(-1, 1))

In [41]:
look_back = 100
x = [] 
y = []

for i in range(look_back, return_series.shape[0]): 
    x_data = scaled_data[i- look_back:i]
    y_data = scaled_data[i]

    x.append(x_data)
    y.append(y_data)

x_train = np.array(x) 
y_train = np.array(y)

In [76]:
model.fit(x_train, y_train, epochs = 20, batch_size = 32, verbose = 1, shuffle = False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2266c297af0>

Now that we have the model fitted, we can take the data and make some predictions. We have to supply the same input format. 

In [58]:
x_pred = scaled_data[-look_back]

In [63]:
x_pred.shape

(1,)

In [60]:
x_pred_reshaped = np.reshape(x_pred, (1, x_pred.shape[0]))

In [62]:
x_pred_reshaped.shape

(1, 1)

In [65]:
prediction = model.predict(x_pred_reshaped).squeeze()



In [70]:
pred = np.array([float(prediction)])
pred = np.reshape(pred, (pred.shape[0], 1))

In [73]:
true_prediction = scaler.inverse_transform(pred)

In [74]:
true_prediction

array([[-0.02213778]])

In [75]:
# Then we turn this into a simple return
np.exp(true_prediction) - 1

array([[-0.02189454]])

We can see here that the neural net is predicting that in the next 15 minutes, there should be a negative 2.2% return on bitcoin. Maybe we would go short here and see what happens over the next 15 minutes. 
One thing that I don't fully understand is how the inputs work into the neural net. We trained it by giving it the 100 returns and having the next period return. Why are we giving it only one data point here, should we not be giving it the full 100 from the past. 