In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
from pandas.tseries.offsets import MonthEnd
from pandas.tseries.offsets import Day
from pandas.tseries.offsets import BusinessDay as BDay
import numpy as np
import yfinance as yf
from datetime import datetime
from datetime import date
from datetime import timedelta
import concurrent.futures as cf

In [3]:
INIT_VAL = 100000
LOOKBACK_YEARS = 5

## Prep

In [4]:
monthly_constituents = pd.read_csv('./SP500_monthly_hist.csv')
monthly_constituents.set_index('Date', inplace=True)

In [5]:
ticker_list = set(monthly_constituents.to_numpy().flatten())
ticker_list.remove(np.nan)
ticker_list = [elem for elem in ticker_list if not elem[0].isnumeric()]

In [6]:
start_date_data = pd.to_datetime(monthly_constituents.index[0]) - pd.DateOffset(years=LOOKBACK_YEARS)
end_date_data = pd.to_datetime(monthly_constituents.index[-1]) + MonthEnd(0) + MonthEnd(1)

## Retrieving data from yahoo

In [7]:
# Retrieves relevant yahoo data for a single ticker
def retreive_yahoo_data_thread(ticker):
    stock = yf.Ticker(ticker)
    stock_hist = stock.history(start=start_date_data, end=end_date_data, interval='1d')
    if not stock_hist.index.empty:
        stock_hist.index = stock_hist.index.tz_localize(None)
        
        return {ticker: stock_hist['Close'].dropna()}
    else:
        return False

# Produces a dict with tickers and corresonding finance data to go with them.
def retreive_yahoo_data(tickers):
    
    tickers_data =  {}
    
    # Threading
    with cf.ThreadPoolExecutor() as executor:
        
        # Creates a thread for each ticker
        datarow = [executor.submit(retreive_yahoo_data_thread, ticker) for ticker in tickers]
        
        for row in cf.as_completed(datarow):
            # print('!!!!!!!!!!!!!!!!!!!!!!!!!')
            # print(row)
            # print(type(row))
            # print(row.result())
            # print(type(row.result()) == bool)
            try:
                if type(row.result()) != bool and not list(row.result().values())[0].empty:
                    # print('#####################')
                    # print(row.result())
                    # print(row.result()[0])
                    # print(row.result().Close[0])
                    tickers_data.update(row.result())
            except:
                pass
    return tickers_data

In [8]:
#Grabs market data for S&P500
market_index = yf.Ticker('^GSPC')
market_hist = market_index.history(start=start_date_data, end=end_date_data, interval='1d')
market_hist = market_hist[['Close']]
market_hist.index = market_hist.index.tz_localize(None)
daily_market_returns = market_hist.pct_change()

# all_ticker_data = retreive_yahoo_data(random.sample(ticker_list, 30))
all_ticker_data = retreive_yahoo_data(ticker_list)

- FLIR: No data found, symbol may be delisted
- TWTR: No data found, symbol may be delisted
- NLSN: No data found, symbol may be delisted
- DISCA: No data found, symbol may be delisted
- ALXN: No data found, symbol may be delisted
- MYL: No data found, symbol may be delisted
- CERN: No data found, symbol may be delisted
- HFC: No data found, symbol may be delisted
- MXIM: No data found, symbol may be delisted
- NBL: No data found, symbol may be delisted
- INFO: No data found, symbol may be delisted
- PBCT: No data found, symbol may be delisted
- VAR: No data found, symbol may be delisted
- CTXS: No data found, symbol may be delisted
- DISCK: No data found, symbol may be delisted
- ETFC: No data found, symbol may be delisted
- DRE: No data found, symbol may be delisted
- KSU: No data found, symbol may be delisted
- XLNX: No data found, symbol may be delisted
- CXO: No data found, symbol may be delisted


## Creating portfolio

In [9]:
from utils import get_price_history_multi
from utils import get_price_history
from utils import populate_next_month

In [10]:
from portfolio_algos.risky_portfolio import produce_portfolio

In [11]:
portfolio_history = pd.DataFrame()
value_history = pd.DataFrame()

current_value = INIT_VAL

i=1
for month in monthly_constituents.index:
    print(i, end='\r')
    i+=1
    
    end_date = pd.to_datetime(month)
    start_date = pd.to_datetime(month) - pd.DateOffset(years=LOOKBACK_YEARS)
    start_date = date(end_date.year - LOOKBACK_YEARS, end_date.month, end_date.day)
    eom_date = end_date + MonthEnd(0) + BDay(1)
    portfolio = produce_portfolio(all_ticker_data, start_date, end_date, eom_date, monthly_constituents.loc[month,:].to_numpy(), current_value, daily_market_returns)

    value_for_month_df, constituent_data_df = populate_next_month(all_ticker_data, portfolio, end_date, eom_date)

    current_value = value_for_month_df['Total Value'].iloc[-1]

    portfolio_history = portfolio_history.iloc[:-1,:]
    value_for_month_df = value_for_month_df.iloc[:-1,:]
    portfolio_history = pd.concat([portfolio_history, constituent_data_df])
    value_history = pd.concat([value_history, value_for_month_df])
try:
    shares_of_market = INIT_VAL / market_hist.loc[pd.Timestamp(monthly_constituents.index[0]) + BDay(0), 'Close']
except:
    shares_of_market = INIT_VAL / market_hist.loc[pd.Timestamp(monthly_constituents.index[0]) + BDay(2), 'Close']
market_hist['SP500'] = market_hist['Close'] * shares_of_market
value_history = pd.concat([value_history, market_hist['SP500']], join='inner', axis=1)

36

In [12]:
value_history

Unnamed: 0,Total Value,SP500
2020-10-01,100000.000000,100000.000000
2020-10-02,103592.296032,99042.234782
2020-10-05,106477.656701,100822.292015
2020-10-06,102405.640063,99413.450135
2020-10-07,104160.949442,101142.921558
...,...,...
2023-09-25,93177.287745,128296.257654
2023-09-26,90930.972779,126405.872085
2023-09-27,91631.184952,126434.858728
2023-09-28,92684.642450,127179.961347


In [13]:
portfolio_history

Unnamed: 0,APA,MRO,DVN,FANG,OXY,OKE,NOV,HAL,HES,LNC,...,MPWR,FSLR,QCOM,MSCI,LRCX,GNRC,URI,NOW,INTU,NXPI
2020-10-01,35000.000000,25000.000000,5000.000000,5000.000000,5000.000000,5000.000000,5000.000000,5000.000000,5000.000000,5000.000000,...,,,,,,,,,,
2020-10-02,36616.528424,26075.950908,5274.725497,5237.194551,5098.140335,5249.307192,5023.094674,5062.668158,4837.095902,5117.590392,...,,,,,,,,,,
2020-10-05,36992.475872,26772.148221,5576.923259,5305.946670,5428.718515,5462.999273,5173.211463,5340.197305,5048.743484,5376.292639,...,,,,,,,,,,
2020-10-06,35413.528248,25569.621719,5346.154208,5020.625337,5232.437344,5354.174353,5005.774654,5304.386106,4870.446760,5288.491334,...,,,,,,,,,,
2020-10-07,35977.439998,26139.245014,5423.076804,5065.313506,5309.916793,5502.572142,5063.512184,5340.197305,4833.247745,5506.427950,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-26,,,,,,,,,,,...,,4505.238285,,,,4514.951160,,4795.340054,,4855.817150
2023-09-27,,,,,,,,,,,...,,4427.633991,,,,4762.962829,,4810.748396,,4898.400901
2023-09-28,,,,,,,,,,,...,,4508.308977,,,,4766.899697,,4873.614558,,4993.779064
2023-09-29,,,,,,,,,,,...,,4510.821245,,,,4766.024689,,4921.512510,,4978.588278


## Exporting

In [14]:
value_history.index = value_history.index.strftime("%m/%d/%y")
value_history.T.to_json('portfolio_vs_sp500_hist.json')

In [15]:
portfolio_history.index = portfolio_history.index.strftime("%m/%d/%y")
portfolio_history.T.to_json('constituent_hist.json')