In [10]:
# import initial libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from pandas_datareader import data as wb
import datetime
import math
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [16]:
# import market returns
apple = wb.DataReader('AAPL', data_source="yahoo", start= '2010/01/01', end='2020/04/10')
apple = apple[['Volume', 'Adj Close']].resample('M').mean()
apple = apple.pct_change()

In [3]:
# import macro data from fed
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2020, 4, 1)
gdp = wb.DataReader("GDP", "fred", start, end).resample('M').mean()
industrial_production = wb.DataReader("INDPRO", "fred", start, end).resample('M').mean()
unemployment = wb.DataReader("UNRATE", "fred", start, end).resample('M').mean()
balance_sheet = wb.DataReader("WALCL", "fred", start, end).resample('M').mean()
consumption = wb.DataReader("PCE", "fred", start, end).resample('M').mean()
m1 = wb.DataReader("M1", "fred", start, end).resample('M').mean()
m2 = wb.DataReader("M2", "fred", start, end).resample('M').mean() 

# concat macro data export to excel
macro = pd.concat([gdp, industrial_production, unemployment, balance_sheet, consumption, m1, m2], axis = 'columns')

In [4]:
# import above macro data 
path = Path('macro_data.CSV')
macro = pd.read_csv(path, index_col ='Date', infer_datetime_format=True)
macro.index = pd.DatetimeIndex(macro.index)

In [5]:
# import ffm 5 factor
path = Path('F-F_Research_Data_5_Factors_2x3_daily.CSV')
ffm_5 = pd.read_csv(path, index_col ='Date', infer_datetime_format=True)
ffm_5.index = pd.DatetimeIndex(ffm_5.index)
ffm_5 = ffm_5.resample('M').mean()

In [15]:
# cleaning data 
data = pd.concat([macro,ffm_5,apple], axis = 'columns')
data.dropna(inplace = True)

In [7]:
data

Unnamed: 0_level_0,GDP,INDPRO,UNRATE,WALCL,PCE,M1,M2,Mkt-RF,SMB,HML,RMW,CMA,RF,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-02-28,1.472135e+13,91.99,0.10,2.267425e+12,1.003060e+13,1.694700e+12,8.489650e+12,0.183158,0.073158,0.165263,-0.016316,0.071579,0.000,-0.070219,-0.030636
2010-03-31,1.472135e+13,92.60,0.10,2.298183e+12,1.008910e+13,1.710500e+12,8.492000e+12,0.266522,0.074348,0.087826,-0.023913,0.070435,0.000,0.012893,0.057741
2010-04-30,1.492610e+13,92.94,0.10,2.328737e+12,1.011290e+13,1.693850e+12,8.517325e+12,0.099048,0.235714,0.133810,0.046667,0.079524,0.001,0.233177,0.039293
2010-05-31,1.492610e+13,94.30,0.10,2.336800e+12,1.013100e+13,1.712160e+12,8.574680e+12,-0.390000,0.016000,-0.111000,0.064500,-0.004500,0.001,0.148224,-0.060347
2010-06-30,1.492610e+13,94.44,0.09,2.337482e+12,1.015140e+13,1.732450e+12,8.598225e+12,-0.247727,-0.112273,-0.208636,-0.011818,-0.070000,0.001,-0.215926,-0.037066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,2.154254e+13,109.85,0.04,3.773093e+12,1.468240e+13,3.850975e+12,1.492572e+13,-0.107273,-0.154091,-0.235455,0.017727,-0.046818,0.007,0.131486,-0.032930
2019-09-30,2.154254e+13,109.47,0.04,3.808398e+12,1.470780e+13,3.899480e+12,1.501772e+13,0.072500,0.018500,0.332000,0.098000,0.175500,0.009,0.022514,0.029235
2019-10-31,2.172912e+13,109.01,0.04,3.970156e+12,1.474540e+13,3.920325e+12,1.514782e+13,0.092174,0.010000,-0.086957,0.019565,-0.039565,0.007,-0.088454,-0.001503
2019-11-30,2.172912e+13,110.03,0.04,4.042612e+12,1.479250e+13,3.948325e+12,1.524925e+13,0.191000,0.022500,-0.090500,-0.072000,-0.060500,0.006,0.070166,0.042728


In [86]:
# import libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [87]:
# create (X) and (y) features
X = data.iloc[:,0:14].values
y = data['Adj Close'].values

In [88]:
# scaling info
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [91]:
def window_data(data, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(data) - window - 1):
        features = data.iloc[i : (i + window), feature_col_number]
        target = data.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [92]:
window_size = 14

feature_column = 14
target_column = 1
X, y = window_data(data, window_size, feature_column, target_column)


In [93]:
from sklearn.model_selection import train_test_split
# Use 70% of the data for training and the remainder for testing
split = int(0.7 * len(X))
X_train = X[: split - 1]
X_test = X[split:]
y_train = y[: split - 1]
y_test = y[split:]


In [94]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [95]:
# Define the LSTM RNN model.
model = Sequential()

layer_1 = 10
layer_2 = 5
number_units = 14

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )

# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))

# Layer 3
model.add(LSTM(units=number_units))

# Output layer
model.add(Dense(1))

In [96]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [97]:
# Summarize the model
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 14, 14)            896       
_________________________________________________________________
lstm_1 (LSTM)                (None, 14, 14)            1624      
_________________________________________________________________
lstm_2 (LSTM)                (None, 14)                1624      
_________________________________________________________________
dense (Dense)                (None, 1)                 15        
Total params: 4,159
Trainable params: 4,159
Non-trainable params: 0
_________________________________________________________________


In [99]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=1, verbose=1)

Train on 71 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1a3fa5f1d0>

In [100]:
model.evaluate(X_test, y_test)



6807.40380859375

In [101]:
# Make some predictions
predicted = model.predict(X_test)