In [5]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [6]:
data = yf.download('GOOGL', start="2018-01-01", interval='1d')

[*********************100%***********************]  1 of 1 completed


In [7]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1053.02002,1075.97998,1053.02002,1073.209961,1073.209961,1588300
2018-01-03,1073.930054,1096.099976,1073.430054,1091.52002,1091.52002,1565900
2018-01-04,1097.089966,1104.079956,1094.26001,1095.76001,1095.76001,1302600
2018-01-05,1103.449951,1113.579956,1101.800049,1110.290039,1110.290039,1512500
2018-01-08,1111.0,1119.160034,1110.0,1114.209961,1114.209961,1232200


In [8]:
data.sort_index(inplace=True)

In [9]:
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [10]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,895.0,895.0,895.0,895.0,895.0,895.0
mean,1408.476982,1423.3864,1394.170837,1409.444049,1409.444049,1810584.0
std,392.273724,395.116438,390.097796,392.994833,392.994833,843275.5
min,984.320007,1012.119995,977.659973,984.669983,984.669983,465600.0
25%,1132.235046,1143.960022,1117.349976,1131.125,1131.125,1272950.0
50%,1241.609985,1248.72998,1227.079956,1240.030029,1240.030029,1584200.0
75%,1518.015015,1538.130005,1506.055054,1518.695007,1518.695007,2078300.0
max,2572.97998,2586.530029,2558.290039,2564.73999,2564.73999,6658900.0


In [11]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines'))
fig.update_layout(height=500, width=900, xaxis_title="Price", yaxis_title="time")
fig.show()

In [13]:
from sklearn.preprocessing import MinMaxScaler

In [14]:
import pickle
from tqdm.notebook import tnrange

In [15]:
data = data[['Close','Volume']]

In [25]:
test_length = data[(data.index >= '2021-01-01')].shape[0]

In [21]:
def creates_features_and_targets(data, feature_length):
  X = []
  Y = []
  for i in tnrange(len(data) - feature_length):
    X.append(data.iloc[i:i+feature_length,:].values)
    Y.append(data['Close'].values[i+feature_length])

  X = np.array(X)
  Y = np.array(Y)
  return X,Y

In [22]:
X,Y = creates_features_and_targets(data,32)

HBox(children=(FloatProgress(value=0.0, max=863.0), HTML(value='')))




In [24]:
X.shape, Y.shape

((863, 32, 2), (863,))

In [26]:
xtrain, xtest, ytrain, ytest = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

In [32]:
class MultiDimensionScalar():
  def __init__(self):
    self.scalers = []

  def fit_transform(self,x):
    total_dims = x.shape[2]
    for i in range(total_dims):
      scaler = MinMaxScaler()
      x[:,:,i] = scaler.fit_transform(x[:,:,i])
      self.scalers.append(scaler)
    return x
  def transform(self,x):
    for i in range(x.shape[2]):
      x[:,:,i] = self.scalers[i].transform(x[:,:,i])
    return x


In [33]:
Feature_Scaler = MultiDimensionScalar()
xtrain = Feature_Scaler.fit_transform(xtrain)
xtest = Feature_Scaler.transform(xtest)

In [34]:
target_Scaler = MinMaxScaler()
ytrain = target_Scaler.fit_transform(ytrain.reshape(-1,1))
ytest = target_Scaler.transform(ytest.reshape(-1,1))

In [35]:
def save_object(obj, name, str):
  pickle_out = open(f"{name}.pck","wb")
  pickle.dump(obj,pickle_out)
  pickle_out.close()

def load_object(name:str):
  pickle_in = open(f"{name}.pck","rb")
  data = pickle.load(pickle_in)
  return data
  

In [36]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
save_best = ModelCheckpoint('best_weights.h5',monitor='val_loss',save_best_only=True,save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=5,min_lr=0.00001,verbose=1)


In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM

model = Sequential()
model.add(Bidirectional(LSTM(512,return_sequences=True,recurrent_dropout=0.1,input_shape = (32,2))))
model.add(LSTM(256, recurrent_dropout = 0.1))
model.add(Dropout(0.3))
model.add(Dense(64, activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='elu'))
model.add(Dense(1, activation='linear'))




In [38]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [None]:
history = model.fit(xtrain, ytrain, epochs = 10, verbose=1, batch_size=1, shuffle=False, validation_data=(xtest, ytest),callbacks=[reduce_lr, save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10