In [1]:
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
import pandas as pd
import numpy as np
import altair as alt
import datetime
import project_functions2 as pf

from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:
#stock_list = ['AMZN', 'AAPL', 'FB','GOOG', 'MSFT', 'TSLA']
#stock_list = ['FISV', 'MNST']

In [3]:
stock_list_df = pd.read_csv('data/nasdaq_list.csv')
stock_list_df = stock_list_df[stock_list_df['Market Cap'] > 0]
stock_list_df = stock_list_df[stock_list_df['IPO Year'] != 2021]
stock_list_df = stock_list_df[stock_list_df['IPO Year'] != 2020]
stock_list_df = stock_list_df[stock_list_df['IPO Year'] != 2019]
stock_list_df = stock_list_df[stock_list_df['Volume'] > 10000]
stock_list = list(stock_list_df['Symbol'])
stock_list_df

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Market Cap,Country,IPO Year,Volume,Sector,Industry
0,AACG,ATA Creativity Global American Depositary Shares,$4.92,0.95,23.929%,1.542445e+08,China,,5805582,Consumer Services,Other Consumer Services
4,AAL,American Airlines Group Inc. Common Stock,$22.93,0.16,0.703%,1.466777e+10,United States,,36928471,Transportation,Air Freight/Delivery Services
5,AAME,Atlantic American Corporation Common Stock,$3.64,-0.31,-7.848%,7.430798e+07,United States,,264911,Finance,Life Insurance
6,AAOI,Applied Optoelectronics Inc. Common Stock,$8.29,0.45,5.74%,2.213401e+08,United States,2013.0,511119,Technology,Semiconductors
7,AAON,AAON Inc. Common Stock,$72.33,1.50,2.118%,3.781921e+09,United States,,140126,Capital Goods,Industrial Machinery/Components
...,...,...,...,...,...,...,...,...,...,...,...
3978,ZSAN,Zosano Pharma Corporation Common Stock,$1.16,-0.08,-6.452%,1.232963e+08,United States,2015.0,1815549,Health Care,Major Pharmaceuticals
3979,ZUMZ,Zumiez Inc. Common Stock,$43.85,0.44,1.014%,1.123427e+09,United States,2005.0,215400,Consumer Services,Clothing/Shoe/Accessory Stores
3980,ZVO,Zovio Inc. Common Stock,$4.69,0.08,1.735%,1.536001e+08,United States,,174308,Consumer Services,Other Consumer Services
3983,ZYNE,Zynerba Pharmaceuticals Inc. Common Stock,$4.62,-0.14,-2.941%,1.905821e+08,United States,2015.0,1196513,Health Care,Major Pharmaceuticals


In [4]:
stock_objects = {}
for stock in stock_list:
    stock_objects[stock] = yf.Ticker(stock)

In [5]:
def trading_sim_LRM(stock_objects, split_time, time_shift):
    columns = ['Date', 'Fund Value', 'Cash']
    stock_names = []
    proj_buy = 0
    curr_cash = 10000
    curr_shares = {}
    drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
                 '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
                 '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
                 '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
                 '5 Day Volume Var','10 Day Open Mean', '10 Day High Mean', 
                 '10 Day Low Mean','10 Day Close Mean', '10 Day Volume Mean', 
                 '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
                 '10 Day Volume Var', '10 Day High', '10 Day Low', 
                 '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
                 '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
                 '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
                 '20 Day Volume Var', '10 Day Open Var']
    
    stock_dfs = {}
    for key in stock_objects:
        stock_dfs[key] = stock_objects[key].history(start='1990-01-01', end='2021-03-31')
        
    #stock_investing = {}
    #for key in stock_objects:
    #    stock_investing[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/investing_'+key+'_sentiment.csv')
    #    stock_investing[key].set_index('date', inplace=True)
    #stock_stocks = {}
    #for key in stock_objects:
    #    stock_stocks[key] = pd.read_csv('/content/drive/MyDrive/SENG474_Project/data/sentiment/stocks_'+key+'_sentiment.csv')
    #    stock_stocks[key].set_index('date', inplace=True)
    
    for key in stock_dfs:
        stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
        stock_dfs[key].drop(drop_list, axis=1, inplace=True)
        #stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
        #stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
        stock_dfs[key].fillna(0, inplace=True)
        stock_dfs[key] = stock_dfs[key].replace([np.inf, -np.inf], np.nan).dropna(how="all", inplace=True)
        stock_dfs[key] = pf.future_low_setup(stock_dfs[key], 1)
        stock_dfs[key] = pf.future_close_setup(stock_dfs[key], time_shift)

    combine_df = pf.combiner(stock_dfs)
        
    test_dfs = stock_dfs
    
    for key in test_dfs:
        test_dfs[key] = test_dfs[key].tail(split_time)
        curr_shares[key] = 0
        stock_names.append(key)
    
    columns = columns + stock_names
    cash_df = pd.DataFrame(columns=columns)
    curr_line = [combine_df.index[int(len(combine_df) - (split_time * len(stock_dfs)))], curr_cash, curr_cash] + len(stock_names)*[0]
    cash_df.loc[len(cash_df)] = curr_line
    
    while split_time > time_shift:
        max_stock = ''
        max_stock_gain = 0
        split_mark = int(len(combine_df) - (split_time * len(stock_dfs)))
        
        X_train, y_train, X_test, y_test = pf.multi_stock_train_test_split(combine_df, split_time, stock_dfs)
        low_train = X_train.iloc[:,-1]
        low_test = X_test.iloc[:,-1]
        X_train.drop(['Low in 1 Days'], axis=1, inplace=True)
        X_test.drop(['Low in 1 Days'], axis=1, inplace=True)
        stock_model = LinearRegression().fit(X_train, y_train)
        low_model = LinearRegression().fit(X_train, low_train)
        
        for key in test_dfs:
            X = test_dfs[key].iloc[:,:-2]
            stock_pred = stock_model.predict(X.head(1))
            stock_pred = (float(stock_pred) - X['Close'][0]) / X['Close'][0]
            low_pred = low_model.predict(X.head(1))
            if stock_pred > max_stock_gain:
                max_stock = key
                max_stock_gain = stock_pred
                max_stock_low_pred = low_pred
        
        for key in test_dfs:
            test_dfs[key] = test_dfs[key].iloc[1:]
        
        
        if max_stock_gain > 0:
            proj_buy = float((test_dfs[max_stock]['Open'][0] - max_stock_low_pred)*0.5 + test_dfs[max_stock]['Low'][0])
            #print(test_dfs[max_stock].index[0])
            #if max_stock_low_pred >= test_dfs[max_stock]['Open'][0]:
            #   print('Buy at open')
            #print(float(proj_buy - test_dfs[max_stock]['Low'][0]))
            if curr_shares[max_stock] == 0:
                for key in curr_shares:
                    curr_cash += curr_shares[key]*test_dfs[key]['Open'][0]
                    curr_shares[key] = 0
                curr_cash -= 6.95
                if test_dfs[max_stock]['Open'][0] <= max_stock_low_pred and test_dfs[max_stock]['Open'][0] <= proj_buy:
                    curr_cash -= 6.95
                    curr_shares[max_stock] = curr_cash // test_dfs[max_stock]['Open'][0]
                    curr_cash -= curr_shares[max_stock]*test_dfs[max_stock]['Open'][0]
                else:
                    curr_cash -= 6.95
                    curr_shares[max_stock] = curr_cash // proj_buy
                    curr_cash -= curr_shares[max_stock]*proj_buy
            else:
                if test_dfs[max_stock]['Open'][0] <= max_stock_low_pred and test_dfs[max_stock]['Open'][0] <= proj_buy and curr_cash > 70:
                    curr_cash -= 6.95
                    curr_shares[max_stock] += curr_cash // test_dfs[max_stock]['Open'][0]
                    curr_cash -= (curr_cash // test_dfs[max_stock]['Open'][0]) * test_dfs[max_stock]['Open'][0]
                else:
                    curr_cash -= 6.95
                    curr_shares[max_stock] += curr_cash // proj_buy
                    curr_cash -= (curr_cash // proj_buy) * proj_buy
        else:
            curr_cash -= 6.95
            for key in curr_shares:
                    curr_cash += curr_shares[key]*test_dfs[key]['Open'][0]
                    curr_shares[key] = 0
        
        curr_line = [X_test.index[len(stock_names)], curr_cash ,curr_cash] + len(stock_names)*[0]
        cash_df.loc[len(cash_df)] = curr_line
        for key in curr_shares:
            cash_df.iloc[-1, cash_df.columns.get_loc(key)] = curr_shares[key]
            cash_df.iloc[-1, cash_df.columns.get_loc('Fund Value')] += curr_shares[key]*test_dfs[key]['Open'][0]
        for key in test_dfs:
            test_dfs[key] = test_dfs[key].iloc[time_shift-1:]
        
        split_time -= time_shift

    return cash_df

In [6]:
fund_df = trading_sim_LRM(stock_objects, 292, 5)
fund_df

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
#drop_list = [ 'Volume', 'Dividends', 'Stock Splits',
#             '5 Day Open Mean', '5 Day High Mean', '5 Day Low Mean',
#             '5 Day Close Mean', '5 Day Volume Mean', '5 Day Open Var',
#             '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
#             '5 Day Volume Var','10 Day Open Mean', '10 Day High Mean', 
#             '10 Day Low Mean','10 Day Close Mean', '10 Day Volume Mean', 
#             '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
#             '10 Day Volume Var', '10 Day High', '10 Day Low', 
#             '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
#             '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
#             '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
#             '20 Day Volume Var', '10 Day Open Var']
#
#stock_dfs = {}
#stock_low_sers = {}
#for key in stock_objects:
#    stock_dfs[key] = stock_objects[key].history(start='1990-01-01', end='2021-03-31')
#
#for key in stock_dfs:
#    stock_dfs[key] = pf.rolling_aves(stock_dfs[key])
#    stock_dfs[key].drop(drop_list, axis=1, inplace=True)
#    #stock_dfs[key] = stock_dfs[key].merge(stock_investing[key], how='left', left_index=True, right_index=True)
#    #stock_dfs[key] = stock_dfs[key].merge(stock_stocks[key], how='left', left_index=True, right_index=True)
#    stock_dfs[key].fillna(0, inplace=True)
#    stock_dfs[key] = stock_dfs[key][~stock_dfs[key].isin([np.nan, np.inf, -np.inf]).any(1)]
#    stock_dfs[key] = pf.future_close_setup(stock_dfs[key], 5)
#    stock_low_sers[key] = pd.Series(stock_dfs[key]['Close'])
#    stock_low_sers[key] = stock_low_sers[key].shift(-1)
#    
#combine_low_ser = pf.ser_combiner(stock_low_sers)
#combine_low_ser.tail(30)