#### Analysis of multiple stocks - for simulation over the course of n years at variable buy in frequency

Goal: This script simulates any number (typically 2) years of n-weekly prediction/closing_cost determinations and simulates for a series of chosen stocks if it is better to invest a consistent price or buy in higher/lower depending on the current performance of the stock.

Take any number of stocks and run a trendline through a series of overlapping cycles of variable length, creating a linear prediction to be applied. Assess the theoretical performance of adjusting contributions as compared to contributing a consistent amount over time.

This script only uses stock opening price and no other external variables. TBD introduction of other info.


Education:

This is an exercise in data transformation. Not statistics or predictive modeling, although the result would be similar to the intended results of predictive modeling. 

Limitations:

This script should be able to imitate any amount time using a series of strategies, given enough stock history available. The main misleading results this will give compared to paper trading is in choosing stocks and pretending you've been interested in them for n years.

In [68]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from datetime import date
from datetime import datetime
import math
from sys import exit
import time
from sklearn.linear_model import LinearRegression
import yahoo_fin.stock_info as si
from yahoofinancials import YahooFinancials
pd.options.mode.chained_assignment = None  # default='warn'


In [69]:
############################ operation turn stock recommender into a function

### to do
# make the result a df with stocks named
# add date of analysis to the results
# how to set a default if blank?
# figure out which packages are actually used (delete them 1 by one)


In [70]:
def pred_open (trade_type: str, stock_list: list, contrib_amt: list, roll_days: int, buyvalue: float, multiplier: float) -> list:
    
    ### duplicate contrib_amt for all stocks if only 1 listed
    if len(contrib_amt) == len(stock_list):
        pass
    elif len(contrib_amt) == 1: 
        contrib_amt = [contrib_amt[0] for x in enumerate(stock_list)]
    else:
        print('Incorrect length of contrib_amt. Make it match the length of the stock list or be 1 value')
        exit()

    ### pull most recent day
    if trade_type == 'crypto' or trade_type == 'index':
        pass
    else:
        x = 0
        while x < 1:
            df_now = yf.download(
            tickers = stock_list
            ,period = '1d' # set for 'today' instead
            ,interval = '1m'
            )

            # ensures a single stock can pass through, not just 2+ 
            if len(stock_list) == 1:
                df_now[stock_list[0]] = df_now['Open']
                df_now = df_now[[stock_list[0]]]
            else:
                df_now = df_now['Open']

            df_now = df_now.head(1) # open for today
            df_now = df_now.fillna(0)

            x = 1
            for i in stock_list:
                x = x * int(df_now[i])

            if x == 0: # wait 15 seconds if data aren't complete
                time.sleep(15)

    ### Overly complex way to pull data, but I have found that 'Open' prices are just a copy of the previous day for the first few minutes of the trading day
    ### This method pulls in the true Open prices for today much quicker (a couple minutes after 6:30am PST)

    if trade_type == 'crypto' or trade_type == 'index':
        df = yf.download(
            tickers = stock_list
            ,period = str(roll_days) + 'd'
        )

        # ensures a single crypto or index can pass through, not just 2+ 
        if len(stock_list) == 1:
            df[stock_list[0]] = df['Open']
            df = df[[stock_list[0]]]
        else:
            df = df['Open']
    else:
        # Pull all data except for today
        df_bulk = yf.download(
                tickers = stock_list
                ,period = str(roll_days) + 'd'
            )

        # ensures a single stock can pass through, not just 2+ 
        if len(stock_list) == 1:
            df_bulk[stock_list[0]] = df_bulk['Open']
            df_bulk = df_bulk[[stock_list[0]]]
        else:
            df_bulk = df_bulk['Open']

        df_good_index = df_bulk.copy() # used to grab the ideal index
        df_bulk.drop(df_bulk.tail(1).index,inplace=True) # bulk w/o the most recent day

        # join the data (index is still bad)
        df = pd.concat([df_bulk, df_now])

        # sub in a good index
        df = df.reindex_like(df_good_index)

        # sub in good open data for today
        for i in stock_list:
            df[i][len(df)-1] = df_now[i].copy()
        

    # add an index and useable date
    df['Index'] = np.arange(1,len(df)+1)
    df['date'] = df.index

    # error checking, if a stock doesn't have enough history based on the current needs
    nlist = []
    for i in stock_list:
        if pd.isna(df[i].iloc[0]) == True:
            nlist.append(i)

    if len(nlist) >0:
        print('Stocks with not enough history', nlist)
        for j in nlist:
            print(j, 'missing days:', df['Index'].count()-df[j].count())
        exit() # Maybe not the best to add this. I still want to see the data

    # create pred and pred/open list for each of the n dataframes
    pred_open_list = []
    for j in stock_list:
        x = range(1,roll_days+1) # range must be 1-roll_days, not the auto implied 0-(roll_days-1)
        y = df[j]
        m, b = np.polyfit(x, y, 1)
        d = m*roll_days+b

        pred_open_list.append(d / df[j][roll_days-1] * d / df[j][roll_days-1])

    multiplier_list = []
    for i, j in enumerate(stock_list):
        if pred_open_list[i] > buyvalue:
            multiplier_list.append(1)
        else:
            multiplier_list.append(0)

    final_buy_list = []
    for i, j in enumerate(stock_list):
        if multiplier_list[i] == 0:
            final_buy_list.append(contrib_amt[i])
        else:
            final_buy_list.append(round(contrib_amt[i]*pred_open_list[i]*multiplier, 2))

    final_df = pd.DataFrame()
    final_df['stock'] = stock_list
    final_df['buy_in_amt'] = final_buy_list

    return final_df


In [71]:
pred_open('stock', ['AAPL', 'AMD', 'AMZN', 'CRM', 'GOOG', 'INTC', 'MSFT', 'NVDA', 'QQQ', 'SBUX', 'SQ', 'TSLA', 'TSM'], 
    [5.00, 3.75, 2.50, 1.25, 2.50, 3.75, 2.50, 3.75, 2.50, 2.50, 2.50, 5.00, 3.75], 
    65, 1.2, 5)


[*********************100%***********************]  13 of 13 completed
[*********************100%***********************]  13 of 13 completed


Unnamed: 0,stock,buy_in_amt
0,AAPL,5.0
1,AMD,3.75
2,AMZN,2.5
3,CRM,1.25
4,GOOG,2.5
5,INTC,3.75
6,MSFT,2.5
7,NVDA,3.75
8,QQQ,2.5
9,SBUX,2.5


In [72]:
pred_open('stock', ['AAPL', 'AMD', 'AMZN', 'CRM', 'GOOG', 'INTC', 'MSFT', 'NVDA', 'QQQ', 'SBUX', 'SQ', 'TSLA', 'TSM'], 
    [5], 
    65, .9, 5)


[*********************100%***********************]  13 of 13 completed
[*********************100%***********************]  13 of 13 completed


Unnamed: 0,stock,buy_in_amt
0,AAPL,5.0
1,AMD,23.39
2,AMZN,5.0
3,CRM,5.0
4,GOOG,23.09
5,INTC,5.0
6,MSFT,5.0
7,NVDA,5.0
8,QQQ,5.0
9,SBUX,5.0


In [73]:
pred_open('stock', ['AAPL'], 
    [5], 
    65, .9, 5)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,stock,buy_in_amt
0,AAPL,5


In [74]:
pred_open('crypto', ['BTC-USD', 'ETH-USD'], 
    [5.00, 3.75], 
    65, .9, 5)

[*********************100%***********************]  2 of 2 completed


TypeError: expected x and y to have same length

In [None]:
pred_open('crypto', ['BTC-USD'], 
    [5.00], 
    65, .9, 5)

[*********************100%***********************]  1 of 1 completed


TypeError: expected x and y to have same length

In [None]:
# trade_type = 'crypto'
# stock_list = ['BTC-USD', 'ETH-USD']
# contrib_amt = [5.00, 3.75]
# roll_days = 65
# buyvalue = .9
# multiplier = 5
    
# ### duplicate contrib_amt for all stocks if only 1 listed
# if len(contrib_amt) == len(stock_list):
#     pass
# elif len(contrib_amt) == 1: 
#     contrib_amt = [contrib_amt[0] for x in enumerate(stock_list)]
# else:
#     print('Incorrect length of contrib_amt. Make it match the length of the stock list or be 1 value')
#     exit()

# ### pull most recent day
# if trade_type == 'crypto' or trade_type == 'index':
#     pass
# else:
#     x = 0
#     while x < 1:
#         df_now = yf.download(
#         tickers = stock_list
#         ,period = '1d' # set for 'today' instead
#         ,interval = '1m'
#         )

#         # ensures a single stock can pass through, not just 2+ 
#         if len(stock_list) == 1:
#             df_now[stock_list[0]] = df_now['Open']
#             df_now = df_now[[stock_list[0]]]
#         else:
#             df_now = df_now['Open']

#         df_now = df_now.head(1) # open for today
#         df_now = df_now.fillna(0)

#         x = 1
#         for i in stock_list:
#             x = x * int(df_now[i])

#         if x == 0: # wait 15 seconds if data aren't complete
#             time.sleep(15)

# ### Overly complex way to pull data, but I have found that 'Open' prices are just a copy of the previous day for the first few minutes of the trading day
# ### This method pulls in the true Open prices for today much quicker (a couple minutes after 6:30am PST)

# if trade_type == 'crypto' or trade_type == 'index':
#     df = yf.download(
#         tickers = stock_list
#         ,period = str(roll_days) + 'd'
#     )

#     # ensures a single crypto or index can pass through, not just 2+ 
#     if len(stock_list) == 1:
#         df[stock_list[0]] = df['Open']
#         df = df[[stock_list[0]]]
#     else:
#         df = df['Open']
# else:
#     # Pull all data except for today
#     df_bulk = yf.download(
#             tickers = stock_list
#             ,period = str(roll_days) + 'd'
#         )

#     # ensures a single stock can pass through, not just 2+ 
#     if len(stock_list) == 1:
#         df_bulk[stock_list[0]] = df_bulk['Open']
#         df_bulk = df_bulk[[stock_list[0]]]
#     else:
#         df_bulk = df_bulk['Open']

#     df_good_index = df_bulk.copy() # used to grab the ideal index
#     df_bulk.drop(df_bulk.tail(1).index,inplace=True) # bulk w/o the most recent day

#     # join the data (index is still bad)
#     df = pd.concat([df_bulk, df_now])

#     # sub in a good index
#     df = df.reindex_like(df_good_index)

#     # sub in good open data for today
#     for i in stock_list:
#         df[i][len(df)-1] = df_now[i].copy()
    

# # add an index and useable date
# df['Index'] = np.arange(1,len(df)+1)
# df['date'] = df.index

# # error checking, if a stock doesn't have enough history based on the current needs
# nlist = []
# for i in stock_list:
#     if pd.isna(df[i].iloc[0]) == True:
#         nlist.append(i)

# if len(nlist) >0:
#     print('Stocks with not enough history', nlist)
#     for j in nlist:
#         print(j, 'missing days:', df['Index'].count()-df[j].count())
#     exit() # Maybe not the best to add this. I still want to see the data

# # create pred and pred/open list for each of the n dataframes
# pred_open_list = []
# for j in stock_list:
#     x = range(1,roll_days+1) # range must be 1-roll_days, not the auto implied 0-(roll_days-1)
#     y = df[j]

# y
# #     m, b = np.polyfit(x, y, 1)
# #     d = m*roll_days+b

# #     pred_open_list.append(d / df[j][roll_days-1] * d / df[j][roll_days-1])

# # multiplier_list = []
# # for i, j in enumerate(stock_list):
# #     if pred_open_list[i] > buyvalue:
# #         multiplier_list.append(1)
# #     else:
# #         multiplier_list.append(0)

# # final_buy_list = []
# # for i, j in enumerate(stock_list):
# #     if multiplier_list[i] == 0:
# #         final_buy_list.append(contrib_amt[i])
# #     else:
# #         final_buy_list.append(round(contrib_amt[i]*pred_open_list[i]*multiplier, 2))

# # final_df = pd.DataFrame()
# # final_df['stock'] = stock_list
# # final_df['buy_in_amt'] = final_buy_list

# # final_df


[*********************100%***********************]  2 of 2 completed


Date
2022-01-30    2598.564941
2022-01-31    2603.263428
2022-02-01    2687.898926
2022-02-02    2791.958984
2022-02-03    2682.226074
                 ...     
2022-03-30    3401.526123
2022-03-31    3385.289307
2022-04-01    3282.576172
2022-04-02    3449.788574
2022-04-04    3513.119873
Name: ETH-USD, Length: 64, dtype: float64