In [4]:
# Initial imports
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn import preprocessing
import hvplot.pandas
import datetime

%matplotlib inline

In [5]:
# Set the random seed for reproducibility
# Note: This is used for model prototyping, but it is good practice to comment this out and run multiple experiments to evaluate your model.
from numpy.random import seed

seed(1)
from tensorflow import random

random.set_seed(2)

In [6]:
#Reading the .csv files

fb_csv = Path("../Resources/fb_data_Jan2020.csv")
fb_df = pd.read_csv(fb_csv)
fb_df['Date'] = pd.to_datetime(fb_df.Date).dt.date

aapl_csv = Path("../Resources/aapl_data_Jan2020.csv")
aapl_df = pd.read_csv(aapl_csv)
aapl_df['Date'] = pd.to_datetime(aapl_df.Date).dt.date

amzn_csv = Path("../Resources/amzn_data_Jan2020.csv")
amzn_df = pd.read_csv(amzn_csv)
amzn_df['Date'] = pd.to_datetime(amzn_df.Date).dt.date

nflx_csv = Path("../Resources/nflx_data_Jan2020.csv")
nflx_df = pd.read_csv(nflx_csv)
nflx_df['Date'] = pd.to_datetime(nflx_df.Date).dt.date

googl_csv = Path("../Resources/googl_data_Jan2020.csv")
googl_df = pd.read_csv(googl_csv)
googl_df['Date'] = pd.to_datetime(googl_df.Date).dt.date

fb_df.set_index("Date", inplace = True)
aapl_df.set_index("Date", inplace = True)
amzn_df.set_index("Date", inplace = True)
nflx_df.set_index("Date", inplace = True)
googl_df.set_index("Date", inplace = True)

In [7]:
googl_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Volatility,sentiment,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-23,0.119401,0.118666,0.02582,0.157435,0.10975,0.01281,0.942184,0.379153
2019-01-24,0.056748,0.047207,0.066633,0.175509,0.121857,0.005687,0.773885,0.398631
2019-01-25,0.225551,0.266782,0.42947,0.360976,0.049075,0.013523,0.863883,0.596044
2019-01-28,0.165634,0.124657,0.143095,0.123476,0.108721,0.031967,0.206057,0.246337
2019-01-29,0.035575,0.0,0.0,0.0,0.036966,0.028076,0.094979,0.078047
2019-01-30,0.0,0.213512,0.176912,0.348067,0.117574,0.064828,0.821937,0.465527
2019-01-31,0.484949,0.619893,0.653507,0.67265,0.2982,0.061234,1.0,0.763302
2019-02-01,0.629699,0.717049,0.803432,0.610746,0.189376,0.010041,0.729194,0.67237
2019-02-04,0.598156,0.831673,0.857738,0.863097,0.595976,0.15288,0.673729,0.890515
2019-02-05,0.735417,0.978346,0.959187,1.0,1.0,0.244317,0.870841,1.0


In [10]:
fb_df = fb_df[["Close", "Volume", "Volatility", "sentiment", "RSI", "Open"]]
aapl_df = aapl_df[["Close", "Volume", "Volatility", "sentiment", "RSI", "Open"]]
amzn_df = amzn_df[["Close", "Volume", "Volatility", "sentiment", "RSI", "Open"]]
nflx_df = nflx_df[["Close", "Volume", "Volatility", "sentiment", "RSI", "Open"]]
googl_df = googl_df[["Close", "Volume", "Volatility", "sentiment", "RSI", "Open"]]

googl_df

Unnamed: 0_level_0,Close,Volume,Volatility,sentiment,RSI,Open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-23,0.157435,0.10975,0.01281,0.942184,0.379153,0.119401
2019-01-24,0.175509,0.121857,0.005687,0.773885,0.398631,0.056748
2019-01-25,0.360976,0.049075,0.013523,0.863883,0.596044,0.225551
2019-01-28,0.123476,0.108721,0.031967,0.206057,0.246337,0.165634
2019-01-29,0.0,0.036966,0.028076,0.094979,0.078047,0.035575
2019-01-30,0.348067,0.117574,0.064828,0.821937,0.465527,0.0
2019-01-31,0.67265,0.2982,0.061234,1.0,0.763302,0.484949
2019-02-01,0.610746,0.189376,0.010041,0.729194,0.67237,0.629699
2019-02-04,0.863097,0.595976,0.15288,0.673729,0.890515,0.598156
2019-02-05,1.0,1.0,0.244317,0.870841,1.0,0.735417


In [12]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i : (i + window), 1:6].values
        X.append(features)
        target = df.iloc[(i + window), target_col_number]
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [14]:
# Creating the features (X) and target (y) data using the window_data() function.
window_size = 5

feature_column = 0
target_column = 0
X, y = window_data(fb_df, window_size, feature_column, target_column)
print (f"X sample values:\n{X[:5]} \n")
print (f"y sample values:\n{y[:5]}")

X sample values:
[[[1.48929007e-01 1.32227225e-02 7.29042985e-01 3.33130830e-01
   1.37048193e-01]
  [1.57118753e-01 3.52725437e-03 7.82018387e-01 4.24138295e-01
   0.00000000e+00]
  [1.36433193e-01 4.84573723e-03 4.34822166e-01 5.90786963e-01
   1.06927711e-01]
  [4.97237429e-02 1.41791317e-03 7.68140831e-01 4.14164591e-01
   1.28388554e-01]
  [1.06619460e-01 1.07512045e-02 4.82273626e-01 1.08914231e-01
   1.29894578e-01]]

 [[1.57118753e-01 3.52725437e-03 7.82018387e-01 4.24138295e-01
   0.00000000e+00]
  [1.36433193e-01 4.84573723e-03 4.34822166e-01 5.90786963e-01
   1.06927711e-01]
  [4.97237429e-02 1.41791317e-03 7.68140831e-01 4.14164591e-01
   1.28388554e-01]
  [1.06619460e-01 1.07512045e-02 4.82273626e-01 1.08914231e-01
   1.29894578e-01]
  [2.89997733e-01 1.87405862e-02 1.95194329e-01 4.33311576e-01
   5.94879518e-02]]

 [[1.36433193e-01 4.84573723e-03 4.34822166e-01 5.90786963e-01
   1.06927711e-01]
  [4.97237429e-02 1.41791317e-03 7.68140831e-01 4.14164591e-01
   1.28388554e

In [17]:
# load model from single file
#from keras.models import load_model

#model_fb = load_model('fb_lstm_model.h5')
# make predictions
#y_fb_predicted = model.predict(X, verbose=0)
#print(y_fb_predicted)