In [1]:
from numpy.random import seed
import tensorflow as tf
seed(45)
tf.random.set_seed(45)

In [2]:
!pip install yfinance



In [3]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
import altair as alt
import datetime
import project_functions2 as pf

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'MSFT', 'TSLA']
stock_objects = {}
for stock in stock_list:
    stock_objects[stock] = yf.Ticker(stock)

In [6]:
def trading_sim_NN(stock_objects, split_time, time_shift):
    scaler = MinMaxScaler()
    columns = ['Date', 'Fund Value', 'Cash']
    stock_names = []
    curr_cash = 10000
    curr_shares = {}
    drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
                 '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
                 '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
                 '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
                 '5 Day Volume Var','10 Day Open Mean', '10 Day High Mean', 
                 '10 Day Low Mean','10 Day Close Mean', '10 Day Volume Mean', 
                 '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
                 '10 Day Volume Var', '10 Day High', '10 Day Low', 
                 '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
                 '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
                 '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
                 '20 Day Volume Var', '10 Day Open Var']
    
    stock_dfs = {}
    for key in stock_objects:
        stock_dfs[key] = stock_objects[key].history(period='max')
        
    stock_investing = {}
    for key in stock_objects:
        stock_investing[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/investing_'+key+'_sentiment.csv')
        stock_investing[key].set_index('date', inplace=True)
    stock_stocks = {}
    for key in stock_objects:
        stock_stocks[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/stocks_'+key+'_sentiment.csv')
        stock_stocks[key].set_index('date', inplace=True)
    
    for key in stock_dfs:
        stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
        stock_dfs[key].drop(drop_list, axis=1, inplace=True)
        stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
        stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
        stock_dfs[key].fillna(0, inplace=True)
        stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
        
    stock_low_dfs = {}
    for key in stock_objects:
        stock_low_dfs[key] = stock_objects[key].history(period='max')
    
    for key in stock_low_dfs:
        stock_low_dfs[key] = pf.rolling_aves(stock_low_dfs[key])
        stock_low_dfs[key].drop(drop_list, axis=1, inplace=True)
        stock_low_dfs[key] = stock_low_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
        stock_low_dfs[key] = stock_low_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
        stock_low_dfs[key].fillna(0, inplace=True)
        stock_low_dfs[key] = pf.future_low_setup(stock_low_dfs[key], 1)

    combine_df = pf.combiner(stock_dfs)
    combine_low_df = pf.combiner(stock_low_dfs)
        
    test_dfs = stock_dfs
    
    for key in test_dfs:
        test_dfs[key] = test_dfs[key].tail(split_time)
        curr_shares[key] = 0
        stock_names.append(key)
    
    columns = columns + stock_names
    cash_df = pd.DataFrame(columns=columns)
    curr_line = [combine_df.index[int(len(combine_df) - (split_time * len(stock_dfs)))], curr_cash, curr_cash] + len(stock_names)*[0]
    cash_df.loc[len(cash_df)] = curr_line
    
    while split_time > time_shift:
        max_stock = ''
        max_stock_gain = 0
        X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, split_time, stock_dfs)
        X_train, low_train, X_test, low_test = pf.multi_stock_train_test_split(combine_low_df, split_time, stock_low_dfs)
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
        leaky_relu = LeakyReLU(alpha=1)

        seed(45)
        tf.random.set_seed(45)
        
        stock_model = Sequential()
        stock_model.add(Dense(units=150, input_dim=X_train_scaled.shape[1], activation=leaky_relu))
        stock_model.add(Dense(units=1, activation=leaky_relu))
        stock_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
        stock_model.fit(X_train_scaled, y_train, epochs=60, batch_size=32, verbose=0,
                        workers=-1, callbacks=[early_stopping])
        low_model = Sequential()
        low_model.add(Dense(units=150, input_dim=X_train_scaled.shape[1], activation=leaky_relu))
        low_model.add(Dense(units=1, activation=leaky_relu))
        low_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
        low_model.fit(X_train_scaled, low_train, epochs=60, batch_size=32, verbose=0,
                      workers=-1, callbacks=[early_stopping])
        
        for key in test_dfs:
            X = test_dfs[key].iloc[:,:-1]
            X_scaled = scaler.transform(X)
            stock_pred = stock_model.predict(X_scaled[:1:,])
            stock_pred = (float(stock_pred) - X['Close'][0]) / X['Close'][0]
            low_pred = low_model.predict(X_scaled[:1:,])
            if stock_pred > max_stock_gain:
                max_stock = key
                max_stock_gain = stock_pred
                max_stock_low_pred = low_pred
        
        for key in test_dfs:
            test_dfs[key] = test_dfs[key].iloc[1:]
        
        if max_stock_gain > 0:
            proj_buy = float((test_dfs[max_stock]['Open'][0] - max_stock_low_pred)*0.5 + test_dfs[max_stock]['Low'][0])
            #print(test_dfs[max_stock].index[0])
            #if max_stock_low_pred >= test_dfs[max_stock]['Open'][0]:
            #   print('Buy at open')
            #print(float(proj_buy - test_dfs[max_stock]['Low'][0]))
            if curr_shares[max_stock] == 0:
                for key in curr_shares:
                    curr_cash += curr_shares[key]*test_dfs[key]['Open'][0]
                    curr_shares[key] = 0
                curr_cash -= 6.95
                if test_dfs[max_stock]['Open'][0] <= max_stock_low_pred and test_dfs[max_stock]['Open'][0] <= proj_buy:
                    curr_cash -= 6.95
                    curr_shares[max_stock] = curr_cash // test_dfs[max_stock]['Open'][0]
                    curr_cash -= curr_shares[max_stock]*test_dfs[max_stock]['Open'][0]
                else:
                    curr_cash -= 6.95
                    curr_shares[max_stock] = curr_cash // proj_buy
                    curr_cash -= curr_shares[max_stock]*proj_buy
            else:
                if test_dfs[max_stock]['Open'][0] <= max_stock_low_pred and test_dfs[max_stock]['Open'][0] <= proj_buy and curr_cash > 70:
                    curr_cash -= 6.95
                    curr_shares[max_stock] += curr_cash // test_dfs[max_stock]['Open'][0]
                    curr_cash -= (curr_cash // test_dfs[max_stock]['Open'][0]) * test_dfs[max_stock]['Open'][0]
                else:
                    curr_cash -= 6.95
                    curr_shares[max_stock] += curr_cash // proj_buy
                    curr_cash -= (curr_cash // proj_buy) * proj_buy
        else:
            curr_cash -= 6.95
            for key in curr_shares:
                    curr_cash += curr_shares[key]*test_dfs[key]['Open'][0]
                    curr_shares[key] = 0

        
        curr_line = [X_test.index[len(stock_names)], curr_cash ,curr_cash] + len(stock_names)*[0]
        cash_df.loc[len(cash_df)] = curr_line
        for key in curr_shares:
            cash_df.iloc[-1, cash_df.columns.get_loc(key)] = curr_shares[key]
            cash_df.iloc[-1, cash_df.columns.get_loc('Fund Value')] += curr_shares[key]*test_dfs[key]['Open'][0]
        for key in test_dfs:
            test_dfs[key] = test_dfs[key].iloc[time_shift-1:]
        
        split_time -= time_shift

    return cash_df

In [None]:
fund_df = trading_sim_NN(stock_objects, 292, 1)
fund_df.set_index('Date', inplace=True)
fund_df

In [None]:
index = yf.Ticker('^IXIC')
index_df = index.history(start='2019-01-01')
index_df

In [None]:
results_df = fund_df.merge(index_df, how='left', left_index=True, right_index=True)
results_df.drop(results_df.columns.difference(['Fund Value','Close']), 1, inplace=True)
results_df.rename(columns={'Close': 'NASDAQ Composite'}, inplace=True)
nas_open = results_df['NASDAQ Composite'][0]
results_df['NASDAQ Composite'] = results_df['NASDAQ Composite']*10000 / nas_open
results_df.reset_index(inplace=True)
results_df = results_df.melt('Date', var_name='Company', value_name='Price')
# Plots Results
line_plot = alt.Chart(results_df).mark_line().encode(
    x = 'Date',
    y = 'Price',
    color = 'Company'
).properties(
    title='Big 6 Neural Net Simulation Feb 2020-April 2021'
)
line_plot