## S&P 400 Galaxy

Use this utlity to update the returns and std_dev fields within investment-options.csv

Globals

In [1]:
# Set refresh_timeseries=True to download timeseries.  Otherwise /symbol-cache is used.
refresh_timeseries = True
throttle_limit=100
wait_time=30

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import brownbear as bb

# Format price data.
pd.options.display.float_format = '{:0.2f}'.format

%matplotlib inline

In [4]:
# Set size of inline plots.
'''note: rcParams can't be in same cell as import matplotlib
   or %matplotlib inline
   
   %matplotlib notebook: will lead to interactive plots embedded within
   the notebook, you can zoom and resize the figure
   
   %matplotlib inline: only draw static images in the notebook
'''
plt.rcParams["figure.figsize"] = (10, 7)

In [5]:
# Read in sp400.csv
sp400 = pd.read_csv('sp400.csv')
sp400.drop(columns=['SEC filings'], inplace=True)
sp400.rename(columns={'Security':'Description',
                      'GICS Sector':'Asset Class',
                      'GICS Sub-Industry': 'GICS Sub Industry'}, inplace=True)
sp400.set_index("Symbol", inplace=True)
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry,Headquarters Location
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,Alcoa,Materials,Aluminum,"Pittsburgh, Pennsylvania"
AAL,American Airlines Group,Industrials,Passenger Airlines,"Fort Worth, Texas"
AAON,AAON,Industrials,Building Products,"Tulsa, Oklahoma"
ACHC,Acadia Healthcare,Health Care,Health Care Facilities,"Franklin, Tennessee"
ACI,Albertsons,Consumer Staples,Food Retail,"Boise, Idaho"
...,...,...,...,...
XPO,"XPO, Inc.",Industrials,Cargo Ground Transportation,"Greenwich, Connecticut"
XRAY,Dentsply Sirona,Health Care,Health Care Supplies,"Charlotte, North Carolina"
YETI,Yeti Holdings,Consumer Discretionary,Leisure Products,"Austin, Texas"
ZI,ZoomInfo,Communication Services,Interactive Media & Services,"Vancouver, Washington"


In [6]:
# Read in gics-2-asset-class.csv
gics2asset_class = pd.read_csv('gics-2-asset-class.csv', skip_blank_lines=True, comment='#')
gics2asset_class.set_index("GICS", inplace=True)
gics2asset_class = gics2asset_class['Asset Class'].to_dict()
gics2asset_class

{'Energy': 'US Stocks:Energy',
 'Materials': 'US Stocks:Materials',
 'Industrials': 'US Stocks:Industrials',
 'Consumer Discretionary': 'US Stocks:Consumer Discretionary',
 'Consumer Staples': 'US Stocks:Consumer Staples',
 'Health Care': 'US Stocks:Healthcare',
 'Financials': 'US Stocks:Financials',
 'Information Technology': 'US Stocks:Technology',
 'Communication Services': 'US Stocks:Communication Services',
 'Utilities': 'US Stocks:Utilities',
 'Real Estate': 'US Stocks:Real Estate'}

In [7]:
# Map sp400 GICS sectors to brownbear defined asset classes.
def _asset_class(row):
    return gics2asset_class[row['Asset Class']]

sp400['Asset Class'] = sp400.apply(_asset_class, axis=1)

# Yahoo finance uses '-' where '.' is used in symbol names.
sp400.index = sp400.index.str.replace('.', '-', regex=False)
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry,Headquarters Location
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,Alcoa,US Stocks:Materials,Aluminum,"Pittsburgh, Pennsylvania"
AAL,American Airlines Group,US Stocks:Industrials,Passenger Airlines,"Fort Worth, Texas"
AAON,AAON,US Stocks:Industrials,Building Products,"Tulsa, Oklahoma"
ACHC,Acadia Healthcare,US Stocks:Healthcare,Health Care Facilities,"Franklin, Tennessee"
ACI,Albertsons,US Stocks:Consumer Staples,Food Retail,"Boise, Idaho"
...,...,...,...,...
XPO,"XPO, Inc.",US Stocks:Industrials,Cargo Ground Transportation,"Greenwich, Connecticut"
XRAY,Dentsply Sirona,US Stocks:Healthcare,Health Care Supplies,"Charlotte, North Carolina"
YETI,Yeti Holdings,US Stocks:Consumer Discretionary,Leisure Products,"Austin, Texas"
ZI,ZoomInfo,US Stocks:Communication Services,Interactive Media & Services,"Vancouver, Washington"


In [8]:
# Drop invalid symbols.
# sp400.drop(['NYCB', 'PNM'], inplace=True)


In [9]:
# Make symbols list.
symbols = list(sp400.index)
#symbols

In [10]:
# Get the timeseries for the symbols and compile into a single csv.
bb.fetch_timeseries(symbols, refresh=refresh_timeseries, throttle_limit=throttle_limit, wait_time=wait_time)
bb.compile_timeseries(symbols)

AA AAL AAON ACHC ACI ACM ADC AFG AGCO AIT ALE ALGM ALLY ALTR ALV AM AMED AMG AMH AMKR AN ANF APPF AR ARMK ARW ASB ASGN ASH ATR AVNT AVT AVTR AXTA AYI BC BCO BDC BERY BHF BILL BIO BJ BKH BLD BLKB BMRN BRBR BRKR BRX BURL BWXT BYD CACI CADE CAR CART CASY CBSH CBT CC CCK CDP CELH CFR CG CGNX CHDN CHE CHH CHRD CHWY CHX CIEN CIVI CLF CLH CMA CMC CNH CNM CNO CNX CNXC COHR COKE COLB COLM COTY CPRI CR CROX CRS CRUS CSL CUBE CUZ CVLT CW CXT 
Throttle limit reached. Waiting for 0 seconds...
CYTK DAR DBX DCI DINO DKS DLB DOCS DOCU DT DTM DUOL EEFT EGP EHC ELF ELS EME ENS ENSG ENTG EPR EQH ESAB ESNT EVR EWBC EXE EXEL EXLS EXP EXPO FAF FBIN FCFS FCN FFIN FHI FHN FIVE FIX FLEX FLG FLO FLR FLS FN FNB FND FNF FOUR FR FYBR G GAP GATX GBCI GEF GGG GHC GLPI GME GMED GNTX GPK GT GTLS GWRE GXO H HAE HALO HGV HLI HLNE HOG HOMB HQY HR HRB HWC HXL IBKR IBOC IDA ILMN INGR IPGP IRDM IRT ITT JAZZ JEF JHG JLL JWN KBH KBR KD KEX 
Throttle limit reached. Waiting for 0 seconds...
KMPR KNF KNSL KNX KRC KRG LAD LAMR LA

In [11]:
# Read symbols timeseries into a dataframe.
df = pd.read_csv('symbols-timeseries.csv', skip_blank_lines=True, comment='#')
df.set_index("Date", inplace=True)
df = df[:]
df

Unnamed: 0_level_0,AA,AAL,AAON,ACHC,ACI,ACM,ADC,AFG,AGCO,AIT,...,WTRG,WTS,WU,WWD,X,XPO,XRAY,YETI,ZI,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,25.30,31.96,22.61,25.86,,26.11,44.92,51.35,46.25,48.88,...,27.92,61.33,12.19,71.03,17.71,19.94,35.01,16.17,,33.72
2019-01-03,25.30,29.58,21.77,25.40,,25.62,45.93,51.14,45.26,47.58,...,28.23,60.01,12.00,68.23,17.68,19.14,35.03,15.04,,33.53
2019-01-04,27.33,31.53,22.96,26.14,,26.95,45.77,51.92,47.92,49.26,...,28.45,61.73,12.16,69.95,19.46,19.69,36.05,15.30,,34.56
2019-01-07,27.41,32.43,23.31,26.30,,27.46,46.00,51.50,49.53,49.97,...,28.20,62.97,12.12,70.15,19.57,20.47,36.70,16.26,,34.70
2019-01-08,27.24,31.90,23.42,26.99,,27.79,47.07,51.57,50.11,51.20,...,28.54,64.27,12.14,73.40,19.81,20.83,36.45,16.83,,35.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-10,30.98,12.50,79.79,29.87,21.15,96.01,76.83,126.68,102.62,227.75,...,40.57,214.03,11.61,173.24,35.14,106.26,16.05,35.01,11.16,47.22
2025-03-11,31.96,11.46,82.39,29.12,21.46,94.38,75.91,125.25,97.85,226.30,...,39.65,209.05,10.98,177.93,37.06,107.16,15.91,33.98,11.02,47.33
2025-03-12,33.25,10.93,81.55,28.82,20.85,94.61,75.25,123.72,94.38,225.58,...,38.55,209.06,10.64,178.83,38.05,109.04,15.33,33.19,10.94,47.94
2025-03-13,32.31,10.67,78.31,28.17,21.19,93.12,75.60,125.00,92.18,218.93,...,39.14,206.39,10.54,176.86,37.52,106.12,15.00,31.92,10.50,47.00


In [12]:
# Sample symbol.
symbol = 'ACHC'

In [13]:
annual_returns = bb.annualized_returns(df, timeperiod='daily', years=1)
annual_returns[symbol]

np.float64(-64.46270479001733)

In [14]:
# Calculate 1 month, 3 months, 1 year, 3 year, and 5 year annualized returns.
annual_returns_1mo = bb.annualized_returns(df, timeperiod='daily', years=1/12)
annual_returns_3mo = bb.annualized_returns(df, timeperiod='daily', years=3/12)
annual_returns_1yr = bb.annualized_returns(df, timeperiod='daily', years=1)
annual_returns_3yr = bb.annualized_returns(df, timeperiod='daily', years=3)
annual_returns_5yr = bb.annualized_returns(df, timeperiod='daily', years=5)

In [15]:
# Calculate 20 day annualized volatility.
daily_returns = df.pct_change()
years = bb.TRADING_DAYS_PER_MONTH / bb.TRADING_DAYS_PER_YEAR
vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years)
vola[symbol]

np.float64(0.9366411241476681)

In [16]:
# Calculate 20 day annualized downside volatility.
ds_vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years, downside=True)
ds_vola[symbol]

np.float64(0.8593477041971721)

In [17]:
# Resample df on a monthly basis.
df.index = pd.to_datetime(df.index)
monthly = df.resample('ME').ffill()

In [18]:
# Calculate monthly returns.
monthly_returns = monthly.pct_change()
monthly_returns[symbol]

Date
2019-01-31     NaN
2019-02-28   -0.04
2019-03-31    0.11
2019-04-30    0.09
2019-05-31    0.01
              ... 
2024-11-30   -0.05
2024-12-31   -0.02
2025-01-31    0.14
2025-02-28   -0.34
2025-03-31   -0.05
Freq: ME, Name: ACHC, Length: 75, dtype: float64

In [19]:
# Calculate 1 year, 3 year, and 5 year annualized standard deviation.
std_dev_1yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=1)
std_dev_3yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=3)
std_dev_5yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=5)

In [20]:
# Read investment-options-header.csv
lines = []
with open('investment-options-in.csv', 'r') as f:
    lines = [line.strip() for line in f]
#lines

In [21]:
# For each symbol, write out the 1 Yr, 3 Yr, 5 Yr, and std dev.
out = lines.copy()

# This is still slow (2.53 s).
for i, (index, row) in enumerate(sp400.iterrows()):

    symbol = index
    description = row['Description']
    asset_class = row['Asset Class']

    ret_1mo = annual_returns_1mo[symbol]
    ret_3mo = annual_returns_3mo[symbol]
    ret_1yr = annual_returns_1yr[symbol]
    ret_3yr = annual_returns_3yr[symbol]
    ret_5yr = annual_returns_5yr[symbol]
    
    if np.isnan(ret_3yr): ret_3yr = ret_1yr
    if np.isnan(ret_5yr): ret_5yr = ret_3yr

    _vola = vola[symbol]*100
    _ds_vola = ds_vola[symbol]*100
    sd_1yr = std_dev_1yr[symbol]*100
    sd_3yr = std_dev_3yr[symbol]*100
    sd_5yr = std_dev_5yr[symbol]*100

    out.append((
        '"{}","{}","{}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}",'
        '"{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}"'
    ).format(
        symbol, description, asset_class, ret_1mo, ret_3mo, ret_1yr, ret_3yr,
        ret_5yr, _vola, _ds_vola, sd_1yr, sd_3yr, sd_5yr
    ))

In [22]:
# Write out asset-classes.csv
with open('investment-options.csv', 'w') as f:
    for line in out:
        f.write(line + '\n')