## S&P 400 Galaxy

Use this utlity to update the returns and std_dev fields within investment-options.csv

Globals

In [10]:
# Set refresh_timeseries=True to download timeseries.  Otherwise /symbol-cache is used.
refresh_timeseries = True
throttle_limit=100
wait_time=30

In [11]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [12]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import brownbear as bb

# Format price data.
pd.options.display.float_format = '{:0.2f}'.format

In [13]:
# Read in sp400.csv
sp400 = pd.read_csv('sp400.csv')
sp400.drop(columns=['SEC filings'], inplace=True)
sp400.rename(columns={'Security':'Description',
                      'GICS Sector':'Asset Class',
                      'GICS Sub-Industry': 'GICS Sub Industry'}, inplace=True)
sp400.set_index("Symbol", inplace=True)
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry,Headquarters Location
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,Alcoa,Materials,Aluminum,"Pittsburgh, Pennsylvania"
AAL,American Airlines Group,Industrials,Passenger Airlines,"Fort Worth, Texas"
AAON,AAON,Industrials,Building Products,"Tulsa, Oklahoma"
ACHC,Acadia Healthcare,Health Care,Health Care Facilities,"Franklin, Tennessee"
ACI,Albertsons,Consumer Staples,Food Retail,"Boise, Idaho"
...,...,...,...,...
XPO,"XPO, Inc.",Industrials,Cargo Ground Transportation,"Greenwich, Connecticut"
XRAY,Dentsply Sirona,Health Care,Health Care Supplies,"Charlotte, North Carolina"
YETI,Yeti Holdings,Consumer Discretionary,Leisure Products,"Austin, Texas"
ZI,ZoomInfo,Communication Services,Interactive Media & Services,"Vancouver, Washington"


In [14]:
# Read in gics-2-asset-class.csv
gics2asset_class = pd.read_csv('gics-2-asset-class.csv', skip_blank_lines=True, comment='#')
gics2asset_class.set_index("GICS", inplace=True)
gics2asset_class = gics2asset_class['Asset Class'].to_dict()
gics2asset_class

{'Energy': 'US Stocks:Energy',
 'Materials': 'US Stocks:Materials',
 'Industrials': 'US Stocks:Industrials',
 'Consumer Discretionary': 'US Stocks:Consumer Discretionary',
 'Consumer Staples': 'US Stocks:Consumer Staples',
 'Health Care': 'US Stocks:Healthcare',
 'Financials': 'US Stocks:Financials',
 'Information Technology': 'US Stocks:Technology',
 'Communication Services': 'US Stocks:Communication Services',
 'Utilities': 'US Stocks:Utilities',
 'Real Estate': 'US Stocks:Real Estate'}

In [15]:
# Map sp400 GICS sectors to brownbear defined asset classes.
def _asset_class(row):
    return gics2asset_class[row['Asset Class']]

sp400['Asset Class'] = sp400.apply(_asset_class, axis=1)

# Yahoo finance uses '-' where '.' is used in symbol names.
sp400.index = sp400.index.str.replace('.', '-', regex=False)
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry,Headquarters Location
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AA,Alcoa,US Stocks:Materials,Aluminum,"Pittsburgh, Pennsylvania"
AAL,American Airlines Group,US Stocks:Industrials,Passenger Airlines,"Fort Worth, Texas"
AAON,AAON,US Stocks:Industrials,Building Products,"Tulsa, Oklahoma"
ACHC,Acadia Healthcare,US Stocks:Healthcare,Health Care Facilities,"Franklin, Tennessee"
ACI,Albertsons,US Stocks:Consumer Staples,Food Retail,"Boise, Idaho"
...,...,...,...,...
XPO,"XPO, Inc.",US Stocks:Industrials,Cargo Ground Transportation,"Greenwich, Connecticut"
XRAY,Dentsply Sirona,US Stocks:Healthcare,Health Care Supplies,"Charlotte, North Carolina"
YETI,Yeti Holdings,US Stocks:Consumer Discretionary,Leisure Products,"Austin, Texas"
ZI,ZoomInfo,US Stocks:Communication Services,Interactive Media & Services,"Vancouver, Washington"


In [16]:
# Drop invalid symbols.
# sp400.drop(['NYCB', 'PNM'], inplace=True)


In [17]:
# Make symbols list.
symbols = list(sp400.index)

In [18]:
# Get the timeseries for the symbols and compile into a single csv.
bb.fetch_timeseries(symbols, refresh=refresh_timeseries, throttle_limit=throttle_limit, wait_time=wait_time)
bb.compile_timeseries(symbols)

AA AAL AAON ACHC ACI ACM ADC AFG AGCO AIT ALE ALGM ALK ALLY ALV AM AMED AMG AMH AMKR AN ANF APPF AR ARMK ARW ASB ASGN ASH ATI ATR AVNT AVT AVTR AXTA AYI BBWI BC BCO BDC BHF BILL BIO BJ BKH BLD BLKB BMRN BRBR BRKR BRX BURL BWXT BYD CACI CADE CAR CART CASY CAVA CBSH CBT CCK CDP CELH CFR CG CGNX CHDN CHE CHH CHRD CHWY CHX CIEN CIVI CLF CLH CMA CMC CNH CNM CNO CNX CNXC COHR COKE COLB COLM COTY CPRI CR CROX CRS CRUS CSL CUBE CUZ CVLT CW CXT CYTK DAR DBX DCI DINO DKS DLB DOCS DOCU DT DTM DUOL EEFT EGP EHC ELF ELS EME ENS ENSG ENTG EPR EQH ESAB ESNT EVR EWBC EXEL EXLS EXP EXPO FAF FBIN FCFS FCN FFIN FHI FHN FIVE FIX FLEX FLG FLO FLR FLS FN FNB FND FNF FOUR FR FYBR G GAP GATX GBCI GEF GGG GHC GLPI GME GMED GNTX GPK GT GTLS GWRE GXO H HAE HALO HGV HIMS HLI HLNE HOG HOMB HQY HR HRB HWC HXL IBKR IBOC IDA ILMN INGR IPGP IRDM IRT ITT JAZZ JEF JHG JLL JWN KBH KBR KD KEX KMPR KNF KNSL KNX KRC KRG LAD LAMR LANC LEA LECO LFUS LITE LIVN LNTH LNW LOPE LPX LSCC LSTR M MAN MANH MASI MAT MEDP MIDD MKSI MLI 

In [19]:
# Read symbols timeseries into a dataframe.
df = pd.read_csv('symbols-timeseries.csv', skip_blank_lines=True, comment='#')
df.set_index("Date", inplace=True)
df

Unnamed: 0_level_0,AA,AAL,AAON,ACHC,ACI,ACM,ADC,AFG,AGCO,AIT,...,WTRG,WTS,WU,WWD,X,XPO,XRAY,YETI,ZI,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,25.30,31.96,22.58,25.86,,26.04,44.62,50.23,46.25,48.88,...,27.92,61.33,11.93,71.03,17.71,19.94,34.66,16.17,,33.72
2019-01-03,25.30,29.58,21.74,25.40,,25.55,45.62,50.02,45.26,47.58,...,28.23,60.01,11.74,68.23,17.68,19.14,34.67,15.04,,33.53
2019-01-04,27.33,31.53,22.93,26.14,,26.88,45.47,50.79,47.92,49.26,...,28.45,61.73,11.89,69.95,19.46,19.69,35.68,15.30,,34.56
2019-01-07,27.41,32.43,23.28,26.30,,27.39,45.69,50.38,49.53,49.97,...,28.20,62.97,11.86,70.15,19.57,20.47,36.32,16.26,,34.70
2019-01-08,27.24,31.90,23.39,26.99,,27.72,46.76,50.44,50.11,51.20,...,28.54,64.27,11.88,73.40,19.81,20.83,36.07,16.83,,35.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-28,25.56,9.85,88.79,22.18,21.69,97.44,76.18,127.31,85.23,238.38,...,40.28,206.26,10.16,181.45,43.25,97.53,13.87,28.64,8.71,44.94
2025-04-29,25.19,9.90,89.11,22.35,21.94,98.17,77.07,128.04,85.23,242.37,...,40.77,206.84,10.11,184.83,43.81,97.43,13.95,28.68,8.70,45.26
2025-04-30,24.53,9.95,91.27,23.40,21.98,98.65,77.61,126.66,84.83,243.28,...,41.13,207.75,9.91,187.57,43.71,106.12,13.90,28.55,8.56,44.97
2025-05-01,24.54,10.03,96.46,22.78,22.01,100.25,76.92,126.11,93.40,228.73,...,40.71,210.52,9.66,190.49,43.46,103.94,13.93,28.35,8.63,45.35


In [20]:
# Calculate Annual Returns.
annual_returns = bb.annualized_returns(df, timeperiod='daily', years=1)
annual_returns

AA     -26.52
AAL    -22.13
AAON     6.02
ACHC   -68.47
ACI     10.42
        ...  
XPO      2.12
XRAY   -51.60
YETI   -19.26
ZI     -43.25
ZION    18.16
Length: 401, dtype: float64

In [21]:
# Calculate 1 month, 3 months, 1 year, 3 year, and 5 year annualized returns.
annual_returns_1mo = bb.annualized_returns(df, timeperiod='daily', years=1/12)
annual_returns_3mo = bb.annualized_returns(df, timeperiod='daily', years=3/12)
annual_returns_1yr = bb.annualized_returns(df, timeperiod='daily', years=1)
annual_returns_3yr = bb.annualized_returns(df, timeperiod='daily', years=3)
annual_returns_5yr = bb.annualized_returns(df, timeperiod='daily', years=5)

In [22]:
# Calculate 20 day annualized volatility.
daily_returns = df.pct_change()
years = bb.TRADING_DAYS_PER_MONTH / bb.TRADING_DAYS_PER_YEAR
vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years)

In [23]:
# Calculate 20 day annualized downside volatility.
ds_vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years, downside=True)

In [24]:
# Resample df on a monthly basis.
df.index = pd.to_datetime(df.index)
monthly = df.resample('ME').ffill()

In [25]:
# Calculate monthly returns.
monthly_returns = monthly.pct_change()

In [26]:
# Calculate 1 year, 3 year, and 5 year annualized standard deviation.
std_dev_1yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=1)
std_dev_3yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=3)
std_dev_5yr = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=5)

In [27]:
# Read investment-options-header.csv
lines = []
with open('investment-options-in.csv', 'r') as f:
    lines = [line.strip() for line in f]

In [28]:
# For each symbol, write out the 1 Yr, 3 Yr, 5 Yr, and std dev.
out = lines.copy()

# This is still slow (2.53 s).
for i, (index, row) in enumerate(sp400.iterrows()):

    symbol = index
    description = row['Description']
    asset_class = row['Asset Class']

    ret_1mo = annual_returns_1mo[symbol]
    ret_3mo = annual_returns_3mo[symbol]
    ret_1yr = annual_returns_1yr[symbol]
    ret_3yr = annual_returns_3yr[symbol]
    ret_5yr = annual_returns_5yr[symbol]
    
    if np.isnan(ret_3yr): ret_3yr = ret_1yr
    if np.isnan(ret_5yr): ret_5yr = ret_3yr

    _vola = vola[symbol]*100
    _ds_vola = ds_vola[symbol]*100
    sd_1yr = std_dev_1yr[symbol]*100
    sd_3yr = std_dev_3yr[symbol]*100
    sd_5yr = std_dev_5yr[symbol]*100

    out.append((
        '"{}","{}","{}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}",'
        '"{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}"'
    ).format(
        symbol, description, asset_class, ret_1mo, ret_3mo, ret_1yr, ret_3yr,
        ret_5yr, _vola, _ds_vola, sd_1yr, sd_3yr, sd_5yr
    ))

In [29]:
# Write out asset-classes.csv
with open('investment-options.csv', 'w') as f:
    for line in out:
        f.write(line + '\n')