In [None]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.63.tar.gz (26 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 6.0 MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.63-py2.py3-none-any.whl size=23918 sha256=050d355eab13df2aac01961622962c01470463cc5d7743fa43540d01cd3f8fe8
  Stored in directory: /root/.cache/pip/wheels/fe/87/8b/7ec24486e001d3926537f5f7801f57a74d181be25b11157983
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.3 yfinance-0.1.63


In [None]:
## Importing Libraries 
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
data = yf.download("AAPL", start="2018-01-01", interval = "1d")

[*********************100%***********************]  1 of 1 completed


In [None]:
data.shape

(928, 6)

In [None]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,42.540001,43.075001,42.314999,43.064999,41.248272,102223600
2018-01-03,43.1325,43.637501,42.990002,43.057499,41.241089,118071600
2018-01-04,43.134998,43.3675,43.02,43.2575,41.432659,89738400
2018-01-05,43.360001,43.842499,43.262501,43.75,41.904385,94640000
2018-01-08,43.587502,43.9025,43.482498,43.587502,41.748737,82271200


**Understanding Trends within the Data**

In [None]:
## Sort the data based on the indexes
data.sort_index(inplace=True)

In [None]:
# Remove all duplicate indexes
data = data.loc[~data.index.duplicated(keep="first")]

In [None]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-01,152.830002,154.979996,152.339996,152.509995,152.509995,80313700
2021-09-02,153.869995,154.720001,152.399994,153.649994,153.649994,71115500
2021-09-03,153.759995,154.630005,153.089996,154.300003,154.300003,57808700
2021-09-07,154.970001,157.259995,154.389999,156.690002,156.690002,82278300
2021-09-08,156.979996,157.039993,153.975006,155.110001,155.110001,71558232


In [None]:
# Checking missing values
data.isna().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [None]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,928.0,928.0,928.0,928.0,928.0,928.0
mean,77.722279,78.616051,76.861993,77.778898,76.809941,127102000.0
std,35.781229,36.1812,35.32376,35.773955,36.165123,58952230.0
min,35.994999,36.43,35.5,35.547501,34.559078,45448000.0
25%,47.6125,47.987501,47.31875,47.6675,46.142632,87318900.0
50%,61.158749,62.23625,59.952499,61.045,60.221018,111882200.0
75%,116.285,117.645002,114.589996,115.992502,115.442596,150380200.0
max,156.979996,157.259995,154.389999,156.690002,156.690002,426510000.0


In [None]:
import plotly.graph_objects as go

In [None]:
# Check the trend in closing values
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y = data["Close"], mode="lines"))
fig.update_layout(height=500, width=900, xaxis_title="Date", yaxis_title="Close")
fig.show()

In [None]:
# Let's visualize the volume values
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y = data["Volume"], mode="lines"))
fig.update_layout(height=500, width=900, xaxis_title="Date", yaxis_title="Volume")
fig.show()

**Data preparation**

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [None]:
# Filter only required data
df_data = data[["Close", "Volume"]]

In [None]:
df_data.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,43.064999,102223600
2018-01-03,43.057499,118071600
2018-01-04,43.2575,89738400
2018-01-05,43.75,94640000
2018-01-08,43.587502,82271200


In [None]:
## Seting the test Set length
test_length = df_data[(df_data.index >="2021-01-01")].shape[0]

In [None]:
test_length

172

In [None]:
def createFeaturesTargets(data, features_length):

  X = []
  y = []

  for i in tnrange(len(data) - features_length):
    X.append(data.iloc[i : i + features_length, :].values)
    y.append(data["Close"].values[i + features_length])

  X = np.array(X)
  y = np.array(y)

  return X, y

In [None]:
X, y = createFeaturesTargets(df_data, 32)

  0%|          | 0/896 [00:00<?, ?it/s]

In [None]:
# let's see the shape of the features and target
X.shape, y.shape

((896, 32, 2), (896,))

In [None]:
X_train, X_test, y_train, y_test = X[:-test_length], X[-test_length:], y[:-test_length], y[-test_length:]

In [None]:
## Training set shape
X_train.shape, y_train.shape

((724, 32, 2), (724,))

In [None]:
# Test set shape
X_test.shape, y_test.shape

((172, 32, 2), (172,))

In [None]:
## Create a scaler to scale vectors with multiple dimensions
## We are writing our own scaler because the sklearn Scaler just supports 
## 2 dimensions whereas our data is 3 dimensions

class MultiDimensionScaler():

  def __init__(self):
    self.scalers = []

  def fit_transform(self, X):
    total_dims = X.shape[2]
    for i in range(total_dims):
      Scaler = MinMaxScaler()
      X[:,:,i] = Scaler.fit_transform(X[:, :, i])
      self.scalers.append(Scaler)

    return X

  def transform(self, X):

    for i in range(X.shape[2]):
      X[:, :, i] = self.scalers[i].transform(X[:, :, i])

    return X


In [None]:
feature_scaler = MultiDimensionScaler()
X_train = feature_scaler.fit_transform(X_train)
X_test = feature_scaler.transform(X_test)

In [None]:
target_scaler = MinMaxScaler()
y_train = target_scaler.fit_transform(y_train.reshape(-1,1))
y_test = target_scaler.transform(y_test.reshape(-1,1))

In [None]:
def save_object(obj, name:str):

  #pickle_out = open(f"{name}.pkl", "wb")
  #pickle.dump(obj, pickle_out)
  #pickle_out.close()

  with open(f"{name}.pkl", "wb") as f:
    pickle.dump(obj, f)

def load_object(name:str):
  with open(f"{name}.pkl", "rb") as f:
    pickle.load(f)

**Forecasting with LSTM**

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

In [None]:
## Defining callbacks
save_best_model = tf.keras.callbacks.ModelCheckpoint("best_weights.h5", monitor="val_loss", save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, patience=5, min_lr=1e-05, verbose=1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional

In [None]:
model = Sequential()

model.add(Bidirectional(LSTM(512, return_sequences=True, recurrent_dropout=0.1), input_shape=(32,2)))
model.add(LSTM(256, recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64, activation="elu")) ## "elu" is a modified version of leaky relu = Relu+Tanh
model.add(Dropout(0.3))
model.add(Dense(64, activation="elu"))
model.add(Dense(1, activation="linear"))



In [None]:
## Optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=2e-03)
model.compile(loss="mse", optimizer = optimizer)

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs = 10,
    batch_size = 1,
    verbose=1,
    shuffle=False,
    validation_data = (X_test, y_test),
    callbacks = [reduce_lr, save_best_model]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Visualizing the Predictions**

In [None]:
## load the best weights
model.load_weights("best_weights.h5")

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions = target_scaler.inverse_transform(predictions)
actual = target_scaler.inverse_transform(y_test)

In [None]:
predictions.shape

(172, 1)

In [None]:
predictions = np.squeeze(predictions, axis=1)
actual = np.squeeze(actual, axis=1)

In [None]:
# Plotting the actual data and the prediction

fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index[-test_length:], y=actual, mode="lines", name="Real data"))
fig.add_trace(go.Scatter(x=data.index[-test_length:], y=predictions, mode="lines", name="Predicted data"))
fig.show()

Visualizing Prediction on the whole dataset

In [None]:
total_features = np.concatenate((X_train, X_test), axis=0)

In [None]:
total_targets = np.concatenate((y_train, y_test), axis=0)

In [None]:
predictions_2 = model.predict(total_features)

In [None]:
predictions_2 = target_scaler.inverse_transform(predictions_2)
actual_2 = target_scaler.inverse_transform(total_targets)

In [None]:
predictions_2 = np.squeeze(predictions_2, axis=1)
actual_2 = np.squeeze(actual_2, axis=1)

In [None]:
# Plotting the actual data and the prediction

fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=actual_2, mode="lines", name="Real data"))
fig.add_trace(go.Scatter(x=data.index, y=predictions_2, mode="lines", name="Predicted data"))
fig.show()

**Scraping Extra features for modeling**

In [None]:
import requests

In [None]:
#response = requests.get("https://www.alphavantage.co/query?function=RSI&symbol=AAPL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM")
#response = response.json()

In [None]:
#response_2 = requests.get("https://www.alphavantage.co/query?function=BBANDS&symbol=AAPL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM")
#response_2 = response_2.json()

We can also fetch more technical indicator from alphavantage as : MACD, BBANDS, etc.

In [None]:
#response_2.keys()

In [None]:
#response.keys()

In [None]:
# = pd.DataFrame.from_dict(response["Technical Analysis: RSI"], orient="index")

In [None]:
#df_rsi

In [None]:
#df_ema = pd.DataFrame.from_dict(response_2["Technical Analysis: EMA"], orient="index")

In [None]:
#df_bbands = pd.DataFrame.from_dict(response_2["Technical Analysis: BBANDS"], orient="index")

In [None]:
#df_bbands

In [None]:
#df_bbands.astype(np.float64)

In [None]:
#df_rsi.head()

In [None]:
#df_ema.head()

In [None]:
#df_rsi = df_rsi[df_rsi.index >= "2018-01-01"]

In [None]:
#df_ema = df_ema[df_ema.index >= "2018-01-01"]

In [None]:
#df_rsi.head()

In [None]:
#df_ema.head()

In [None]:
#df_rsi["RSI"] = df_rsi["RSI"].astype(np.float64)

In [None]:
#df_ema["EMA"] = df_ema["EMA"].astype(np.float64)

In [None]:
#df_rsi.head()

In [None]:
#df_ema.head()

In [None]:
#df_data.head()

In [None]:
#final_data = df_data.merge(df_rsi, left_index=True, right_index=True, how="inner")

In [None]:
#final_data = final_data.merge(df_ema, left_index=True, right_index=True, how="inner")

In [None]:
#final_data.head()

In [None]:
def createFinalDataframe(dataframe, list_indicator):

  for indicator in list_indicator:

    if indicator == "BBANDS" or "MACD":
      response = requests.get("https://www.alphavantage.co/query?function=" + indicator + "&symbol=AAPL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM")
      response = response.json()

      df = pd.DataFrame.from_dict(response["Technical Analysis: "+indicator], orient="index")
      df = df[df.index >= "2018-01-01"]

      df = df.astype(np.float64)

      dataframe = dataframe.merge(df, left_index=True, right_index=True, how="inner")

    else:
      response = requests.get("https://www.alphavantage.co/query?function=" + indicator + "&symbol=AAPL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM")
      response = response.json()

      df = pd.DataFrame.from_dict(response["Technical Analysis: "+indicator], orient="index")
      df = df[df.index >= "2018-01-01"]

      df[indicator] = df[indicator].astype(np.float64)

      dataframe = dataframe.merge(df, left_index=True, right_index=True, how="inner")

  return dataframe

In [None]:
indicators = ["RSI", "EMA", "MACD", "BBANDS"]
final_data = createFinalDataframe(df_data, indicators)

In [None]:
final_data

Unnamed: 0,Close,Volume,RSI,EMA,MACD,MACD_Hist,MACD_Signal,Real Lower Band,Real Upper Band,Real Middle Band
2018-01-02,43.064999,102223600,52.9563,41.0517,0.1019,-0.1214,0.2233,40.4172,41.3511,40.8841
2018-01-03,43.057499,118071600,52.7116,41.1141,0.0984,-0.0999,0.1983,40.4232,41.5041,40.9636
2018-01-04,43.257500,89738400,59.0236,41.2196,0.1098,-0.0708,0.1806,40.4462,41.7138,41.0800
2018-01-05,43.750000,94640000,70.9567,41.4471,0.1552,-0.0204,0.1755,40.3785,42.1570,41.2677
2018-01-08,43.587502,82271200,63.3480,41.5470,0.1765,0.0008,0.1757,40.9769,42.0489,41.5129
...,...,...,...,...,...,...,...,...,...,...
2021-08-31,151.830002,86453100,65.0791,150.5975,1.7153,0.0834,1.6319,145.5335,154.2465,149.8900
2021-09-01,152.509995,80313700,68.2842,151.2350,1.8367,0.1639,1.6729,146.2655,155.1745,150.7200
2021-09-02,153.649994,71115500,73.4003,152.0400,2.0019,0.2632,1.7387,148.3857,155.4983,151.9420
2021-09-03,154.300003,57808700,76.1431,152.7933,2.1603,0.3373,1.8230,151.3610,154.8030,153.0820


In [None]:
X, y = createFeaturesTargets(final_data, 32)

  0%|          | 0/895 [00:00<?, ?it/s]

In [None]:
# let's see the shape of the features and target
X.shape, y.shape

((895, 32, 10), (895,))

In [None]:
X_train, X_test, y_train, y_test = X[:-test_length], X[-test_length:], y[:-test_length], y[-test_length:]

In [None]:
## Training set shape
X_train.shape, y_train.shape

((723, 32, 10), (723,))

In [None]:
X_train.shape[2]

10

In [None]:
type(X_train.shape[2])

int

In [None]:
# Test set shape
X_test.shape, y_test.shape

((172, 32, 10), (172,))

In [None]:
feature_scaler = MultiDimensionScaler()
X_train = feature_scaler.fit_transform(X_train)
X_test = feature_scaler.transform(X_test)

In [None]:
target_scaler = MinMaxScaler()
y_train = target_scaler.fit_transform(y_train.reshape(-1,1))
y_test = target_scaler.transform(y_test.reshape(-1,1))

In [None]:
## Defining callbacks
save_best_model = tf.keras.callbacks.ModelCheckpoint("best_weights.h5", monitor="val_loss", save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.25, patience=5, min_lr=1e-05, verbose=1)

In [None]:
model = Sequential()

model.add(Bidirectional(LSTM(512, return_sequences=True, recurrent_dropout=0.1), input_shape=(32,X_train.shape[2])))
model.add(LSTM(256, recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64, activation="elu")) ## "elu" is a modified version of leaky relu = Relu+Tanh
model.add(Dropout(0.3))
model.add(Dense(64, activation="elu"))
model.add(Dense(1, activation="linear"))



In [None]:
## Optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=2e-03)
model.compile(loss="mse", optimizer = optimizer)

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs = 10,
    batch_size = 1,
    verbose=1,
    shuffle=False,
    validation_data = (X_test, y_test),
    callbacks = [reduce_lr, save_best_model]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/10
Epoch 10/10


**Visualizing the Predictions**

In [None]:
## load the best weights
model.load_weights("best_weights.h5")

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions = target_scaler.inverse_transform(predictions)
actual = target_scaler.inverse_transform(y_test)

In [None]:
predictions.shape

(172, 1)

In [None]:
predictions = np.squeeze(predictions, axis=1)
actual = np.squeeze(actual, axis=1)

In [None]:
# Plotting the actual data and the prediction

fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index[-test_length:], y=actual, mode="lines", name="Real data"))
fig.add_trace(go.Scatter(x=data.index[-test_length:], y=predictions, mode="lines", name="Predicted data"))
fig.show()

Visualizing Prediction on the whole dataset

In [None]:
total_features = np.concatenate((X_train, X_test), axis=0)

In [None]:
total_targets = np.concatenate((y_train, y_test), axis=0)

In [None]:
predictions_2 = model.predict(total_features)

In [None]:
predictions_2 = target_scaler.inverse_transform(predictions_2)
actual_2 = target_scaler.inverse_transform(total_targets)

In [None]:
predictions_2 = np.squeeze(predictions_2, axis=1)
actual_2 = np.squeeze(actual_2, axis=1)

In [None]:
# Plotting the actual data and the prediction

fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=actual_2, mode="lines", name="Real data"))
fig.add_trace(go.Scatter(x=data.index, y=predictions_2, mode="lines", name="Predicted data"))
fig.show()

**Saving the model**

In [None]:
model.save("model.h5")
loaded_model= tf.keras.models.load_model("model.h5")



**Real Time Prediction**

In [None]:
def predictStockPrice(model, dataframe, previousDate, feature_length=32):
  idx_location = dataframe.index.get_loc(previousDate)
  features = dataframe.iloc[idx_location - feature_length : idx_location, :].values
  features = np.expand_dims(features, axis=0)
  features = feature_scaler.transform(features)
  prediction = model.predict(features)
  prediction = target_scaler.inverse_transform(prediction)

  return prediction[0][0]

In [None]:
predictStockPrice(loaded_model, final_data, "2021-04-27")

135.26556

In [None]:
final_data[final_data.index == "2021-04-27"]

Unnamed: 0,Close,Volume,RSI,EMA,MACD,MACD_Hist,MACD_Signal,Real Lower Band,Real Upper Band,Real Middle Band
2021-04-27,134.389999,66015800,60.4676,133.6056,2.5325,0.2024,2.3301,131.3518,135.3435,133.3476
