In [1]:
import numpy as np
import pandas as pd
import sklearn as sk
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, InputLayer

In [3]:
prices = pd.read_csv('../Functions/csv_files/prices/train.csv')
technical_30 = pd.read_csv('../Functions/csv_files/technical_data/train_period_30.csv')

In [4]:
print(len(prices))
print(len(technical_30))

3054769
1202688


Here, we would like to implement a multivariate time series LSTM model that utilizes historical prices along with technical indicators to predict prices.

In [5]:
t = pd.merge(technical_30, prices, how='inner', on=['date', 'ticker'])
t['date'] = pd.to_datetime(t['date'], format='%Y-%m-%d')

In [6]:
t.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1202688 entries, 0 to 1202687
Data columns (total 15 columns):
 #   Column          Non-Null Count    Dtype         
---  ------          --------------    -----         
 0   date            1202688 non-null  datetime64[ns]
 1   sma             1202688 non-null  float64       
 2   ema             1202688 non-null  float64       
 3   wma             1202688 non-null  float64       
 4   volatility      1202688 non-null  float64       
 5   k_values        1202688 non-null  float64       
 6   d_values        1202688 non-null  float64       
 7   slope           1202688 non-null  float64       
 8   ticker          1202688 non-null  object        
 9   open            1202688 non-null  float64       
 10  high            1202688 non-null  float64       
 11  low             1202688 non-null  float64       
 12  close           1202688 non-null  float64       
 13  adjusted_close  1202688 non-null  float64       
 14  volume          12

In [7]:
"""
We need to reshape the data to create a windowed dataframe
"""
from sklearn import preprocessing

class Pipeline:
	def __init__(self, dataframe):
		self.dataframe = dataframe
		self.X = []
		self.Y = []
		self.n = 100

		if len(dataframe) <= self.n:
			raise Exception("Dataframe is too small for the given n!")
		else:
			self.transform()

	def get_x(self):
		return np.array(self.X)

	def get_y(self):
		return np.array(self.Y)

	def transform(self):
		for price in range(self.n + 1, len(self.dataframe)):
			self.split(self.dataframe.iloc[price - self.n - 1: price])

	def split(self, subset):
		# all items have to be the same ticker
		if len(subset['ticker'].unique().tolist()) > 1:
			return
		subset = subset.drop(columns=['ticker'])
		x = subset.head(self.n)
		y = subset.tail(1)['adjusted_close']
		x.index = x.date
		x = x.drop(columns=['date'])
		self.X.append(x.to_numpy())
		self.Y.append(y.values[0])

In [8]:
temp = Pipeline(t.iloc[10000:70000])

In [12]:
np.savetxt('x_training.txt', temp.get_x())
np.savetxt('y.testing.txt', temp.get_y())

ValueError: Expected 1D or 2D array, got 3D array instead

In [13]:
model = Sequential()
model.add(InputLayer((100, 13)))
model.add(LSTM(64))
model.add(Dense(8, 'relu'))
model.add(Dense(1, 'linear'))
model.compile(optimizer='Adam', loss='mean_squared_error')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                19968     
                                                                 
 dense (Dense)               (None, 8)                 520       
                                                                 
 dense_1 (Dense)             (None, 1)                 9         
                                                                 
Total params: 20,497
Trainable params: 20,497
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.fit(temp.get_x(), temp.get_y(), epochs = 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
 418/1838 [=====>........................] - ETA: 1:35 - loss: 3991.6074

KeyboardInterrupt: 

In [15]:
temp.get_x().size

76438700