# Stock Market Analysis

## Setup of System

In [135]:
!nvidia-smi

Wed Mar 23 21:02:51 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P8    32W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Fetch Stock Market Data

In [136]:
!pip install yfinance



In [137]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf

In [138]:
data = yf.download("GOOGL" , start = "2018-01-01" , internal = '1d')

[*********************100%***********************]  1 of 1 completed


In [139]:
data.shape

(1064, 6)

## Understand Market Data

In [140]:
data.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1053.02002,1075.97998,1053.02002,1073.209961,1073.209961,1588300
2018-01-03,1073.930054,1096.099976,1073.430054,1091.52002,1091.52002,1565900
2018-01-04,1097.089966,1104.079956,1094.26001,1095.76001,1095.76001,1302600


### Description of the Columns
#### Date
##### Date of the stock price information.
#### Open
##### The price of the stock when trading begun.
#### High
##### The highest price of the stock throughout the day.
#### Low
##### The lowest price of the stock throughout the day.
#### Close
##### Price of the stock at the close of the day.
#### Adjusted CLose
##### The adjusted close price amends a closing price of a stock's value after accounting for corporate actions. This can include stock splits that increase the total number of stocks while not affecting total valuation.
#### Volume
###### Trading volume is measured in how muh of a givin financial asset has traded in a period of time. For stocks, volume is measured in the number of shares traded.

## Understand Trends Within Data

In [141]:
# Sort the data points based on indexes just for confirmation
data.sort_index(inplace = True)

In [142]:
# Remove any duplicate index
data = data.loc[~data.index.duplicated(keep='first')]

In [143]:
data.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-21,2723.27002,2741.0,2681.850098,2722.030029,2722.030029,1341600
2022-03-22,2722.030029,2821.0,2722.030029,2797.360107,2797.360107,1774800
2022-03-23,2774.050049,2791.77002,2757.01001,2765.51001,2765.51001,1219499


In [144]:
# Check for missing values
data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [145]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1064.0,1064.0,1064.0,1064.0,1064.0,1064.0
mean,1628.861691,1645.687196,1611.765172,1629.285338,1629.285338,1782908.0
std,623.664312,628.589567,617.370938,622.978857,622.978857,830557.4
min,984.320007,1012.119995,977.659973,984.669983,984.669983,465600.0
25%,1149.532471,1159.464996,1137.915009,1150.789978,1150.789978,1255400.0
50%,1339.399963,1351.720032,1327.369995,1340.945007,1340.945007,1572450.0
75%,2058.554993,2083.815002,2034.590027,2059.189941,2059.189941,2039525.0
max,3025.0,3030.929932,2977.97998,2996.77002,2996.77002,6658900.0


In [146]:
import plotly.graph_objects as go

# Check the trend in Open, High, Low and Close Values
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Close'] , mode = "lines" , name='Close'))
fig.add_trace(go.Scatter(x = data.index , y = data['Open'] , mode = "lines" , name='Open'))
fig.add_trace(go.Scatter(x = data.index , y = data['High'] , mode = "lines" , name='High'))
fig.add_trace(go.Scatter(x = data.index , y = data['Low'] , mode = "lines" , name='Low'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Price')
fig.show()

In [147]:
# Check the trend in Volume
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index , y = data['Volume'] , mode = "lines" , name='Volume'))
fig.update_layout(height = 500 , width = 900,
                  xaxis_title='Date' , yaxis_title='Price')
fig.show()

### Visualized Trends
#### Daily Price
##### The first graph is a depiction of the day to day prices variations. This graph includes all of the prices for the stock: opening price, closing price, highest price of the day, and lowest price of the day.
#### Daily Trading Volume
##### Lastly, there is a graph picturing changes in volume of stock traded throughout the day. 

## Process the Stock Market Data

In [148]:
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [149]:
# Filter only required data
data = data[['Close' , 'Volume']]
data.head(3)

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,1073.209961,1588300
2018-01-03,1091.52002,1565900
2018-01-04,1095.76001,1302600


In [150]:
#  Confirm the testing set length
test_length = data[(data.index >= '2021-10-01')].shape[0]

In [151]:
def CreateFeatures_and_Targets(data, feature_length):
    X = []
    Y = []

    for i in tnrange(len(data) - feature_length):
        X.append(data.iloc[i : i + feature_length,:].values)
        Y.append(data["Close"].values[i+feature_length])

    X = np.array(X)
    Y = np.array(Y)

    return X , Y

In [152]:
X , Y = CreateFeatures_and_Targets(data , 32)

  0%|          | 0/1032 [00:00<?, ?it/s]

In [153]:
# Check the shapes
X.shape , Y.shape

((1032, 32, 2), (1032,))

In [154]:
Xtrain , Xtest , Ytrain , Ytest = X[:-test_length] , X[-test_length:] , Y[:-test_length] , Y[-test_length:]

In [155]:
# Check training dataset shape
Xtrain.shape , Ytrain.shape

((912, 32, 2), (912,))

In [156]:
# Check testing dataset shape
Xtest.shape , Ytest.shape

((120, 32, 2), (120,))

In [157]:
# Create a scalar to scale vectors with multiple dimensions
class MultiDimensionScalar():
    def __init__(self):
        self.scalers = []

    def fit_transform(self, X):
        total_dims = X.shape[2]
        for i in range(total_dims):
            Scaler = MinMaxScaler()
            X[:, :, i] = Scaler.fit_transform(X[:, :, i])
            self.scalers.append(Scaler)
        return X

    def transform(self , X):
        for i in range(X.shape[2]):
            X[:, :, i] = self.scalers[i].transform(X[:,:,i])
        return X

In [158]:
Feature_Scaler = MultiDimensionScalar()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [159]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [160]:
def save_object(obj , name : str):
    pickle_out = open(f"{name}.pck","wb")
    pickle.dump(obj , pickle_out)
    pickle_out.close()

def load_object(name : str):
    pickle_in = open(f"{name}.pck","rb")
    data = pickle.load(pickle_in)
    return data

In [161]:
# Save objects for future purposes
save_object(Feature_Scaler , "Feature_Scaler")
save_object(Target_Scaler , "Target_Scaler")

## Scrape RSI for Modeling

In [162]:
import requests

response = requests.get('https://www.alphavantage.co/query?function=RSI&symbol=GOOGL&interval=daily&time_period=5&series_type=close&apikey=43T9T17VCV2ME4SM')
response = response.json()

In [163]:
response.keys()

dict_keys(['Meta Data', 'Technical Analysis: RSI'])

In [164]:
rsi_data = pd.DataFrame.from_dict(response['Technical Analysis: RSI'] , orient='index')

In [166]:
rsi_data = rsi_data[rsi_data.index >= '2018-01-01']

In [202]:
rsi_data['RSI'] = rsi_data['RSI'].astype(np.float64)

In [203]:
rsi_data.head()


Unnamed: 0,RSI
2018-01-02,65.431
2018-01-03,78.3686
2018-01-04,80.483
2018-01-05,86.2429
2018-01-08,87.4882


In [199]:
data.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-02,1073.209961,1588300
2018-01-03,1091.52002,1565900
2018-01-04,1095.76001,1302600
2018-01-05,1110.290039,1512500
2018-01-08,1114.209961,1232200


In [204]:
# The Problem lies in the "Data" header, need to find out what that is and fix it
  # This means either means removing it from "data" or adding it to "rsi_data"


# data3 = data.drop('Data')
# data3.head()

In [200]:
# Perform Merge

# data2 = data.merge(rsi_data, on='Date', how='inner')
data2 = data.merge(rsi_data['RSI'], left_index=True, right_index=True)

In [201]:
# Check for success of merge

data2.tail()
data2.shape

(0, 3)

## Forecast with LSTM's

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint , ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor='val_loss', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25,patience=5, min_lr=0.00001,verbose = 1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , LSTM , Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512 , return_sequences=True, recurrent_dropout=0.1, input_shape=(32, 2))))
model.add(LSTM(256, recurrent_dropout=0.1))
model.add(Dropout(0.3))
model.add(Dense(64 , activation='elu'))
model.add(Dropout(0.3))
model.add(Dense(32 , activation='elu'))
model.add(Dense(1 , activation='linear')) # Final Layer



In [None]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.002)
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss='mse', optimizer=optimizer)

In [None]:
history = model.fit(Xtrain, Ytrain,
                    epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle = False,
                    validation_data = (Xtest, Ytest),
                    callbacks = [reduce_lr, save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Visualize Predictions

In [None]:
# load the best weights
model.load_weights("best_weights.h5")

In [None]:
Predictions = model.predict(Xtest)

In [None]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [None]:
Predictions.shape

(116, 1)

In [None]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [None]:
# Check the Predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

In [None]:
Total_features = np.concatenate((Xtrain , Xtest) , axis = 0)

In [None]:
Total_Targets = np.concatenate((Ytrain , Ytest) , axis = 0)

In [None]:
Predictions = model.predict(Total_features)

In [None]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Total_Targets)

In [None]:
Predictions = np.squeeze(Predictions , axis = 1)
Actual = np.squeeze(Actual , axis = 1)

In [None]:
# Check the Predictions vs Actual
fig = go.Figure()

fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Actual , mode = 'lines' , name='Actual'))
fig.add_trace(go.Scatter(x = data.index[-test_length:] , y = Predictions , mode = 'lines' , name='Predicted'))
fig.show()

## Save Predictions

In [None]:
# Save and load the whole model
model.save("Model.h5")
loaded_model = tf.keras.load_model("Model.h5")

NameError: ignored

In [None]:
def PredictStockPrice(Model , DataFrame , PreviousDate , feature_length = 32):
    idx_location = DataFrame.index.get_loc(PreviousDate)
    Features = DataFrame.iloc[idx_location - feature_length : idx_location,:].values
    Features = np.expand_dims(features , axis = 0)
    Features = Feature_Scaler.transform(Features)
    Prediction = Model.predict(Features)
    Prediction = Target_Scaler.inverse_transform(Prediction)
    return Prediction[0][0]

In [None]:
PredictStockPrice(loaded_model , data , '2021-03-18')