In [1]:
from numpy.random import seed
import tensorflow as tf
seed(45)
tf.random.set_seed(45)

In [2]:
!pip install yfinance



In [3]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
import altair as alt
import project_functions2 as pf

from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.layers import GRU, LSTM, SimpleRNN
from tensorflow.keras.layers import RepeatVector, TimeDistributed
from tensorflow.keras.layers import Dense, Bidirectional
from tensorflow.keras.activations import elu, relu
from keras.layers import Dropout
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'MSFT', 'TSLA']
stock_objects = {}
for stock in stock_list:
    stock_objects[stock] = yf.Ticker(stock)

In [6]:
stock_investing = {}
for key in stock_objects:
    stock_investing[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/investing_'+key+'_sentiment.csv')
    stock_investing[key].set_index('date', inplace=True)
stock_stocks = {}
for key in stock_objects:
    stock_stocks[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/stocks_'+key+'_sentiment.csv')
    stock_stocks[key].set_index('date', inplace=True)

In [7]:
stock_dfs = {}
drop_list = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits', 'Mt',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day High', '5 Day Low',
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day High', '20 Day Low']
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']      
clost_drop = ['Close', '- 1 Days Close', '- 2 Days Close', '- 3 Days Close',
               '- 4 Days Close', '- 5 Days Close']
stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')

for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
    #stock_dfs[key].drop(clost_drop, axis=1, inplace=True)
    
    
combine_df = pf.combiner(stock_dfs)

X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, 365, stock_dfs)
X_scaler = MinMaxScaler()
#y_scaler = MinMaxScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
#y_train = y_scaler.fit_transform(y_train)
#y_test = y_scaler.transform(y_test)
X_train_3d = np.reshape(X_train, (X_train.shape[0], 6, int(X_train.shape[1]/6)))
X_test_3d = np.reshape(X_test, (X_test.shape[0], 6, int(X_test.shape[1]/6)))

In [8]:
early_stopping = EarlyStopping(monitor='loss',
                               patience=2, restore_best_weights=True)
leaky_relu = LeakyReLU(alpha=1)

np.random.seed(45)
tf.random.set_seed(45)

model = Sequential()
#model.add(TimeDistributed(Dense(units=X_train_3d.shape[2], activation=leaky_relu)))
model.add(SimpleRNN(units=X_train_3d.shape[1], activation=leaky_relu,
               input_shape=(X_train_3d.shape[1], X_train_3d.shape[2])))
#model.add(RepeatVector(X_train_3d.shape[1]))
#model.add(SimpleRNN(units=X_train_3d.shape[1], return_sequences=False, activation=leaky_relu))
model.add(Dense(units=X_train_3d.shape[2], activation=leaky_relu))
model.add(Dense(units=1, activation=leaky_relu))
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(X_train_3d, y_train, epochs=60, batch_size=32, verbose=1,
          workers=-1, callbacks=[early_stopping])

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60


<tensorflow.python.keras.callbacks.History at 0x7f7caad19650>

In [9]:
predictions = model.predict(X_test_3d)
#predictions = y_scaler.inverse_transform(predictions)
#y_test = y_scaler.inverse_transform(y_test)
model_score = r2_score(y_test[:-len(stock_dfs)*5], predictions[:-len(stock_dfs)*5])
model_score
#predictions 0.996109772994378

0.9908327426528523

In [10]:
train_preds = model.predict(X_train_3d)
#train_preds = y_scaler.inverse_transform(train_preds)
#y_train = y_scaler.inverse_transform(y_train)
model_train_score = r2_score(y_train, train_preds)
model_train_score
#X_train_3d.shape[2]

0.9961158929600548

# GRU

In [21]:
early_stopping = EarlyStopping(monitor='loss',
                               patience=3, restore_best_weights=True)
leaky_relu = LeakyReLU(alpha=1)

np.random.seed(45)
tf.random.set_seed(45)

model2 = Sequential()
model2.add(GRU(units=X_train_3d.shape[1], activation=leaky_relu, recurrent_activation="sigmoid",
               input_shape=(X_train_3d.shape[1], X_train_3d.shape[2])))
#model2.add(RepeatVector(X_train_3d.shape[1]))
#model2.add(LSTM(units=X_train_3d.shape[1], return_sequences=False, activation=leaky_relu))
model2.add(Dense(units=200, activation=leaky_relu))
model2.add(Dense(units = 1, activation=leaky_relu))
model2.compile(optimizer = 'adam', loss = 'mean_squared_error')
model2.fit(X_train_3d, y_train, epochs=60, batch_size=32, verbose=1,
           workers=-1, callbacks=[early_stopping])

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60


<tensorflow.python.keras.callbacks.History at 0x7f16db9ad0d0>

In [22]:
predictions2 = model2.predict(X_test_3d)
model_score2 = r2_score(y_test.dropna(), predictions2[:len(y_test.dropna())])
model_score2

0.9934388535738545

In [23]:
train_preds2 = model2.predict(X_train_3d)
model_train_score2 = r2_score(y_train, train_preds2)
model_train_score2

0.9932070090647193