In [1]:
from numpy.random import seed
import tensorflow as tf
seed(45)
tf.random.set_seed(45)

In [2]:
!pip install yfinance

Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/a7/ee/315752b9ef281ba83c62aa7ec2e2074f85223da6e7e74efb4d3e11c0f510/yfinance-0.1.59.tar.gz
Collecting lxml>=4.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/cf/4d/6537313bf58fe22b508f08cf3eb86b29b6f9edf68e00454224539421073b/lxml-4.6.3-cp37-cp37m-manylinux1_x86_64.whl (5.5MB)
[K     |████████████████████████████████| 5.5MB 24.4MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.59-py2.py3-none-any.whl size=23442 sha256=2fcf247f0e84f16d0a637def29b11db845064839e10c1f3a7d83ed372b19aa64
  Stored in directory: /root/.cache/pip/wheels/f8/2a/0f/4b5a86e1d52e451757eb6bc17fd899629f0925c777741b6d04
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfull

In [3]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
import altair as alt
import tensorflow as tf
import project_functions2 as pf

from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, GRU
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [5]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 15785833377079135381, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 15703311680
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 15275704609618735226
 physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"]

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
# These top two lines are needed to produce altair plots on google colab
# Comment these two lines out if you are running locally
!pip install altair_data_server
alt.data_transformers.enable('data_server')

# These bottem two lines are needed tto produce altair plots on local machine
# Comment these two line out to run on google colab
#alt.renderers.enable('default')
#alt.data_transformers.enable('json')

Collecting altair_data_server
  Downloading https://files.pythonhosted.org/packages/e7/a3/0e7651adce146c17eea516ffcb530f7ee769671e59395bc10838eca827db/altair_data_server-0.4.1-py3-none-any.whl
Installing collected packages: altair-data-server
Successfully installed altair-data-server-0.4.1


DataTransformerRegistry.enable('data_server')

In [8]:
stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'MSFT', 'TSLA']
#stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'TSLA']
stock_objects = {}
for stock in stock_list:
    stock_objects[stock] = yf.Ticker(stock)

In [9]:
stock_investing = {}
for key in stock_objects:
    stock_investing[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/investing_'+key+'_sentiment.csv')
    stock_investing[key].set_index('date', inplace=True)
stock_stocks = {}
for key in stock_objects:
    stock_stocks[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/stocks_'+key+'_sentiment.csv')
    stock_stocks[key].set_index('date', inplace=True)

In [None]:
def combiner(stock_dfs):
    combine_df = None

    for key in stock_dfs:
        if combine_df is not None:
            combine_df = pd.concat([combine_df, stock_dfs[key]])
        else:
            combine_df = stock_dfs[key]

    combine_df.sort_values(by=['Date'], inplace=True)
    return combine_df

In [10]:
def neural_net_sklearn(combine_df, split_time, stock_dfs):
    scaler = MinMaxScaler()
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on last year
    # Change the -50 to a differnt value to change split point
    split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
    X_train = X.head(split_mark)
    X_test = X.tail(len(combine_df) - split_mark)
    y_train = y.head(split_mark)
    y_test = y.tail(len(combine_df) - split_mark)

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    stock_nn = MLPRegressor(hidden_layer_sizes = [500, 500, 500], random_state=45).fit(X_train_scaled, y_train)
    
    stock_nn_pred = stock_nn.predict(X_test_scaled)
    stock_nn_train_pred = stock_nn.predict(X_train_scaled)
    
    sell_line = stock_nn_pred*1.1
    buy_line = stock_nn_pred*0.9
    
    # Plots Results

    train_score = r2_score(y_train, stock_nn_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    columns = []
    for key in stock_dfs:
        columns.append(key + ' Price')
        columns.append(key + ' Prediction')
    
    single_split_mark = int(len(stock_dfs[list(stock_dfs.keys())[0]])-split_time)
    results_df = pd.DataFrame(columns=columns, 
                              index=stock_dfs[list(stock_dfs.keys())[0]].tail(len(stock_dfs[list(stock_dfs.keys())[0]])-single_split_mark).index)

    i = 0
    for key in stock_dfs:
        single_split_mark = int(len(stock_dfs[key])-split_time)
        results_df[columns[i]] = stock_dfs[key].iloc[:,-1:].tail(len(stock_dfs[key]) - single_split_mark)
        curr_X_train = scaler.transform(stock_dfs[key].iloc[:,:-1].tail(len(stock_dfs[key]) - single_split_mark))
        results_df[columns[i+1]] = stock_nn.predict(curr_X_train)
        i += 2
        
    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    model_score = r2_score(y_test.dropna(), stock_nn_pred[:len(y_test.dropna())])
    print("R2 Score: " + str(model_score))
    
    return line_plot

In [None]:
def neural_net_keras(combine_df, split_time, stock_dfs):
    scaler = MinMaxScaler()
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on last year
    # Change the -50 to a differnt value to change split point
    split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
    X_train = X.head(split_mark)
    X_test = X.tail(len(combine_df) - split_mark)
    y_train = y.head(split_mark)
    y_test = y.tail(len(combine_df) - split_mark)

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    early_stopping = EarlyStopping(monitor='loss',
                                   patience=3, restore_best_weights=True)
    leaky_relu = LeakyReLU(alpha=1)
    
    seed(45)
    tf.random.set_seed(45)

    stock_nn = Sequential()
    stock_nn.add(Dense(units=150, input_dim=X_train_scaled.shape[1], activation=leaky_relu))
    #stock_nn.add(Dense(units=300, activation=leaky_relu))
    #stock_nn.add(Dense(units=500, activation='relu'))
    stock_nn.add(Dense(units=1, activation=leaky_relu))
    stock_nn.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_nn.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=1,
                 workers=-1, callbacks=[early_stopping])
    
    stock_nn_pred = stock_nn.predict(X_test_scaled)
    stock_nn_train_pred = stock_nn.predict(X_train_scaled)
    

    
    # Plots Results

    train_score = r2_score(y_train, stock_nn_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    columns = []
    for key in stock_dfs:
        columns.append(key + ' Price')
        columns.append(key + ' Prediction')
    
    single_split_mark = int(len(stock_dfs[list(stock_dfs.keys())[0]])-split_time)
    results_df = pd.DataFrame(columns=columns, 
                              index=stock_dfs[list(stock_dfs.keys())[0]].tail(len(stock_dfs[list(stock_dfs.keys())[0]])-single_split_mark).index)

    i = 0
    for key in stock_dfs:
        single_split_mark = int(len(stock_dfs[key])-split_time)
        results_df[columns[i]] = stock_dfs[key].iloc[:,-1:].tail(len(stock_dfs[key]) - single_split_mark)
        curr_X_train = scaler.transform(stock_dfs[key].iloc[:,:-1].tail(len(stock_dfs[key]) - single_split_mark))
        results_df[columns[i+1]] = stock_nn.predict(curr_X_train)
        i += 2
        
    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    model_score = r2_score(y_test.dropna(), stock_nn_pred[:len(y_test.dropna())])
    print("R2 Score: " + str(model_score))
    
    return line_plot

In [11]:
def rnn(combine_df, split_time, stock_dfs):
    scaler = MinMaxScaler()
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on last year
    # Change the -50 to a differnt value to change split point
    X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, split_time, stock_dfs)

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_3d = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 6, int(X_train_scaled.shape[1]/6)))
    X_test_3d = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 6, int(X_test_scaled.shape[1]/6)))

    early_stopping = EarlyStopping(monitor='loss',
                                   patience=2, restore_best_weights=True)
    leaky_relu = LeakyReLU(alpha=1)
    
    seed(45)
    tf.random.set_seed(45)

    stock_nn = Sequential()
    #model.add(TimeDistributed(Dense(units=X_train_3d.shape[2], activation=leaky_relu)))
    stock_nn.add(SimpleRNN(units=X_train_3d.shape[1], activation=leaky_relu))
    #model.add(RepeatVector(X_train_3d.shape[1]))
    #model.add(SimpleRNN(units=X_train_3d.shape[1], return_sequences=False, activation=leaky_relu))
    stock_nn.add(Dense(units=X_train_3d.shape[2], activation=leaky_relu,
                    input_shape=(X_train_3d.shape[1], X_train_3d.shape[2])))
    stock_nn.add(Dense(units=1, activation=leaky_relu))
    stock_nn.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_nn.fit(X_train_3d, y_train, epochs=60, batch_size=32, verbose=1,
              workers=-1, callbacks=[early_stopping])
    
    stock_nn_pred = stock_nn.predict(X_test_3d)
    stock_nn_train_pred = stock_nn.predict(X_train_3d)
    

    
    # Plots Results

    train_score = r2_score(y_train, stock_nn_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    columns = []
    for key in stock_dfs:
        columns.append(key + ' Price')
        columns.append(key + ' Prediction')
    
    single_split_mark = int(len(stock_dfs[list(stock_dfs.keys())[0]])-split_time)
    results_df = pd.DataFrame(columns=columns, 
                              index=stock_dfs[list(stock_dfs.keys())[0]].tail(len(stock_dfs[list(stock_dfs.keys())[0]])-single_split_mark).index)

    i = 0
    for key in stock_dfs:
        single_split_mark = int(len(stock_dfs[key])-split_time)
        results_df[columns[i]] = stock_dfs[key].iloc[:,-1:].tail(len(stock_dfs[key]) - single_split_mark)
        curr_X = scaler.transform(stock_dfs[key].iloc[:,:-1].tail(len(stock_dfs[key]) - single_split_mark))
        curr_X_3d = np.reshape(curr_X, (curr_X.shape[0], 6, int(curr_X.shape[1]/6)))
        results_df[columns[i+1]] = stock_nn.predict(curr_X_3d)
        i += 2
        
    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    model_score = r2_score(y_test.dropna(), stock_nn_pred[:len(y_test.dropna())])
    print("R2 Score: " + str(model_score))
    
    return line_plot

In [12]:
def lstm(combine_df, split_time, stock_dfs):
    scaler = MinMaxScaler()
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on last year
    # Change the -50 to a differnt value to change split point
    X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, split_time, stock_dfs)

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_3d = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 6, int(X_train_scaled.shape[1]/6)))
    X_test_3d = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 6, int(X_test_scaled.shape[1]/6)))

    early_stopping = EarlyStopping(monitor='loss',
                                   patience=2, restore_best_weights=True)
    leaky_relu = LeakyReLU(alpha=1)
    
    seed(45)
    tf.random.set_seed(45)

    stock_nn = Sequential()
    #model.add(TimeDistributed(Dense(units=X_train_3d.shape[2], activation=leaky_relu)))
    stock_nn.add(LSTM(units=X_train_3d.shape[1], activation=leaky_relu))
    #model.add(RepeatVector(X_train_3d.shape[1]))
    #model.add(SimpleRNN(units=X_train_3d.shape[1], return_sequences=False, activation=leaky_relu))
    stock_nn.add(Dense(units=200, activation=leaky_relu,
                    input_shape=(X_train_3d.shape[1], X_train_3d.shape[2])))
    stock_nn.add(Dense(units=1, activation=leaky_relu))
    stock_nn.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_nn.fit(X_train_3d, y_train, epochs=60, batch_size=128, verbose=1,
              workers=-1, callbacks=[early_stopping])
    
    stock_nn_pred = stock_nn.predict(X_test_3d)
    stock_nn_train_pred = stock_nn.predict(X_train_3d)
    

    
    # Plots Results
    train_score = r2_score(y_train, stock_nn_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    columns = []
    for key in stock_dfs:
        columns.append(key + ' Price')
        columns.append(key + ' Prediction')
    
    single_split_mark = int(len(stock_dfs[list(stock_dfs.keys())[0]])-split_time)
    results_df = pd.DataFrame(columns=columns, 
                              index=stock_dfs[list(stock_dfs.keys())[0]].tail(len(stock_dfs[list(stock_dfs.keys())[0]])-single_split_mark).index)

    i = 0
    for key in stock_dfs:
        single_split_mark = int(len(stock_dfs[key])-split_time)
        results_df[columns[i]] = stock_dfs[key].iloc[:,-1:].tail(len(stock_dfs[key]) - single_split_mark)
        curr_X = scaler.transform(stock_dfs[key].iloc[:,:-1].tail(len(stock_dfs[key]) - single_split_mark))
        curr_X_3d = np.reshape(curr_X, (curr_X.shape[0], 6, int(curr_X.shape[1]/6)))
        results_df[columns[i+1]] = stock_nn.predict(curr_X_3d)
        i += 2
        
    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    model_score = r2_score(y_test.dropna(), stock_nn_pred[:len(y_test.dropna())])
    print("R2 Score: " + str(model_score))
    
    return line_plot

In [13]:
def gru(combine_df, split_time, stock_dfs):
    scaler = MinMaxScaler()
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on last year
    # Change the -50 to a differnt value to change split point
    X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, split_time, stock_dfs)

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_3d = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 6, int(X_train_scaled.shape[1]/6)))
    X_test_3d = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 6, int(X_test_scaled.shape[1]/6)))

    early_stopping = EarlyStopping(monitor='loss',
                                   patience=2, restore_best_weights=True)
    leaky_relu = LeakyReLU(alpha=1)
    
    seed(45)
    tf.random.set_seed(45)

    stock_nn = Sequential()
    #model.add(TimeDistributed(Dense(units=X_train_3d.shape[2], activation=leaky_relu)))
    stock_nn.add(GRU(units=X_train_3d.shape[1], activation=leaky_relu))
    #model.add(RepeatVector(X_train_3d.shape[1]))
    #model.add(SimpleRNN(units=X_train_3d.shape[1], return_sequences=False, activation=leaky_relu))
    stock_nn.add(Dense(units=200, activation=leaky_relu,
                    input_shape=(X_train_3d.shape[1], X_train_3d.shape[2])))
    stock_nn.add(Dense(units=1, activation=leaky_relu))
    stock_nn.compile(optimizer = 'adam', loss = 'mean_squared_error')
    stock_nn.fit(X_train_3d, y_train, epochs=60, batch_size=32, verbose=1,
              workers=-1, callbacks=[early_stopping])
    
    stock_nn_pred = stock_nn.predict(X_test_3d)
    stock_nn_train_pred = stock_nn.predict(X_train_3d)
    

    
    # Plots Results

    train_score = r2_score(y_train, stock_nn_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    columns = []
    for key in stock_dfs:
        columns.append(key + ' Price')
        columns.append(key + ' Prediction')
    
    single_split_mark = int(len(stock_dfs[list(stock_dfs.keys())[0]])-split_time)
    results_df = pd.DataFrame(columns=columns, 
                              index=stock_dfs[list(stock_dfs.keys())[0]].tail(len(stock_dfs[list(stock_dfs.keys())[0]])-single_split_mark).index)

    i = 0
    for key in stock_dfs:
        single_split_mark = int(len(stock_dfs[key])-split_time)
        results_df[columns[i]] = stock_dfs[key].iloc[:,-1:].tail(len(stock_dfs[key]) - single_split_mark)
        curr_X = scaler.transform(stock_dfs[key].iloc[:,:-1].tail(len(stock_dfs[key]) - single_split_mark))
        curr_X_3d = np.reshape(curr_X, (curr_X.shape[0], 6, int(curr_X.shape[1]/6)))
        results_df[columns[i+1]] = stock_nn.predict(curr_X_3d)
        i += 2
        
    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    model_score = r2_score(y_test.dropna(), stock_nn_pred[:len(y_test.dropna())])
    print("R2 Score: " + str(model_score))
    
    return line_plot

In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var',
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var']   

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)

combine_df = combiner(stock_dfs)
neural_net_sklearn(combine_df, 365, stock_dfs)

Training R2 Score: 0.9980748624653476

R2 Score: 0.9958104878572668


In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt', 'Golden Cross']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)

combine_df = combiner(stock_dfs)
neural_net_keras(combine_df, 365, stock_dfs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Training R2 Score: 0.9982811447658426

R2 Score: 0.9965866469517576


In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)

combine_df = pf.combiner(stock_dfs)
rnn(combine_df, 365, stock_dfs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Training R2 Score: 0.9968741650421248

R2 Score: 0.9945149756726508


In [14]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)

combine_df = pf.combiner(stock_dfs)
lstm(combine_df, 365, stock_dfs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Training R2 Score: 0.9973016005266081

R2 Score: 0.9951648565322748


# Trying % Change with LSTM

In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.percent_model_setup(stock_dfs[key])
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    #stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    #stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].replace([np.inf, -np.inf], np.nan, inplace=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_percent_change_setup(stock_dfs[key], 5)

combine_df = pf.combiner(stock_dfs)
lstm(combine_df, 365, stock_dfs) 

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Training R2 Score: -0.014057474034675943

R2 Score: -0.07227999441539379


In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)

combine_df = pf.combiner(stock_dfs)
gru(combine_df, 365, stock_dfs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Training R2 Score: 0.9967892538653991

R2 Score: 0.9912124390702032


In [None]:
drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
       '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
       '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
       '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day Dt', 
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day Dt', 
             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day Dt']  

stock_dfs = {}
for key in stock_objects:
    stock_dfs[key] = stock_objects[key].history(period='max')
    
for key in stock_dfs:
    #stock_dfs[key] = pf.percent_model_setup(stock_dfs[key])
    #stock_dfs[key] = pf.date_time_prep(stock_dfs[key]) 
    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
    #drop_list = list(stock_dfs[key].columns)
    #drop_list.remove('Close')
    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
    #stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
    #stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
    #stock_dfs[key].replace([np.inf, -np.inf], np.nan, inplace=True)
    stock_dfs[key].fillna(0, inplace=True)
    stock_dfs[key] = pf.lstm_prep(stock_dfs[key], lookback=5)
    stock_dfs[key] = pf.future_percent_change_setup(stock_dfs[key], 1)

combine_df = pf.combiner(stock_dfs)
gru(combine_df, 365, stock_dfs)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Training R2 Score: 0.0003869875785629251

R2 Score: -0.0031884802579291716
