Use this utlity to update the returns and std_dev fields within investment-options.csv

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import brownbear as bb

# format price data
pd.options.display.float_format = '{:0.2f}'.format

%matplotlib inline

In [3]:
# set size of inline plots
'''note: rcParams can't be in same cell as import matplotlib
   or %matplotlib inline
   
   %matplotlib notebook: will lead to interactive plots embedded within
   the notebook, you can zoom and resize the figure
   
   %matplotlib inline: only draw static images in the notebook
'''
plt.rcParams["figure.figsize"] = (10, 7)

Globals

In [4]:
# set refresh_timeseries=True to download timeseries.  Otherwise /symbol-cache is used.
refresh_timeseries = True

In [5]:
# read in sp400.csv
sp400 = pd.read_csv('sp400.csv')
sp400.drop(columns=['SEC filings'], inplace=True)
sp400.rename(columns={'Ticker symbol': 'Symbol',
                      'Security':'Description',
                      'GICS Sector':'Asset Class',
                      'GICS Sub-Industry': 'GICS Sub Industry'}, inplace=True)
sp400.set_index("Symbol", inplace=True)
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACHC,Acadia Healthcare,Health Care,Health Care Facilities
ACIW,ACI Worldwide,Information Technology,Application Software
ADNT,Adient plc,Consumer Discretionary,Auto Parts & Equipment
ATGE,Adtalem Global Education,Consumer Discretionary,Education Services
ACM,AECOM,Industrials,Construction & Engineering
...,...,...,...
WH,"Wyndham Hotels & Resorts, Inc.",Consumer Discretionary,"Hotels, Resorts & Cruise Lines"
X,United States Steel Corporation,Materials,Steel
Y,Alleghany Corporation,Financials,Reinsurance
XPO,XPO Logistics,Industrials,Air Freight & Logistics


In [6]:
# read in gics-2-asset-class.csv
gics2asset_class = pd.read_csv('gics-2-asset-class.csv', skip_blank_lines=True, comment='#')
gics2asset_class.set_index("GICS", inplace=True)
gics2asset_class = gics2asset_class['Asset Class'].to_dict()
gics2asset_class

{'Energy': 'US Stocks:Energy',
 'Materials': 'US Stocks:Materials',
 'Industrials': 'US Stocks:Industrials',
 'Consumer Discretionary': 'US Stocks:Consumer Discretionary',
 'Consumer Staples': 'US Stocks:Consumer Staples',
 'Health Care': 'US Stocks:Healthcare',
 'Financials': 'US Stocks:Financials',
 'Information Technology': 'US Stocks:Technology',
 'Communication Services': 'US Stocks:Communication Services',
 'Utilities': 'US Stocks:Utilities',
 'Real Estate': 'US Stocks:Real Estate'}

In [7]:
# map sp400 GICS sectors to brownbear defined asset classes
def _asset_class(row):
    return gics2asset_class[row['Asset Class']]

sp400['Asset Class'] = sp400.apply(_asset_class, axis=1)

# yahoo finance uses '-' where '.' is used in symbol names
sp400.index = sp400.index.str.replace('.', '-')
sp400

Unnamed: 0_level_0,Description,Asset Class,GICS Sub Industry
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACHC,Acadia Healthcare,US Stocks:Healthcare,Health Care Facilities
ACIW,ACI Worldwide,US Stocks:Technology,Application Software
ADNT,Adient plc,US Stocks:Consumer Discretionary,Auto Parts & Equipment
ATGE,Adtalem Global Education,US Stocks:Consumer Discretionary,Education Services
ACM,AECOM,US Stocks:Industrials,Construction & Engineering
...,...,...,...
WH,"Wyndham Hotels & Resorts, Inc.",US Stocks:Consumer Discretionary,"Hotels, Resorts & Cruise Lines"
X,United States Steel Corporation,US Stocks:Materials,Steel
Y,Alleghany Corporation,US Stocks:Financials,Reinsurance
XPO,XPO Logistics,US Stocks:Industrials,Air Freight & Logistics


In [8]:
# drop invalid symbols
sp400.drop(['AAXN'], inplace=True)

In [9]:
# make symbols list
symbols = list(sp400.index)
#symbols

In [10]:
# get the timeseries for the symbols and compile into a single csv
bb.fetch_timeseries(symbols, refresh=refresh_timeseries)
bb.compile_timeseries(symbols)

.......................................
('Date')
.
('Date')
......................................................................................................................................................................................................................................................................................................................................................................


In [11]:
# read symbols timeseries into a dataframe
df = pd.read_csv('symbols-timeseries.csv', skip_blank_lines=True, comment='#')
df.set_index("Date", inplace=True)
df = df[:]
df

Unnamed: 0_level_0,ACHC,ACIW,ADNT,ATGE,ACM,AMG,ADS,ACC,AEO,AFG,...,WTRG,WW,WWD,WWE,WYND,WH,X,Y,XPO,YELP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,59.98,19.71,,43.01,30.35,203.96,268.24,32.45,11.66,46.96,...,23.17,21.53,46.48,10.87,31.56,,25.11,444.64,40.63,55.15
2015-01-05,59.12,19.26,,43.60,29.02,196.57,264.50,32.64,11.80,46.34,...,22.72,21.26,45.48,10.67,30.99,,23.93,435.03,39.37,52.53
2015-01-06,58.19,18.95,,41.75,28.73,192.33,261.14,32.98,11.84,46.08,...,22.75,19.74,45.15,10.70,30.62,,23.21,432.47,37.98,52.44
2015-01-07,60.63,19.03,,42.04,29.32,197.05,263.14,33.04,12.42,46.38,...,22.95,20.35,45.57,10.26,31.19,,23.26,434.10,38.14,52.21
2015-01-08,61.76,19.01,,42.91,30.23,199.06,270.64,33.23,11.83,47.22,...,23.10,20.32,45.34,10.14,32.01,,23.77,437.79,38.35,53.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-10,53.17,41.18,36.99,41.09,52.94,144.36,85.53,43.00,24.82,106.44,...,48.14,29.57,115.33,46.68,50.69,62.30,16.41,601.98,122.52,36.79
2021-02-11,53.18,41.21,36.92,40.80,54.61,145.16,83.54,43.85,24.87,108.03,...,47.90,29.75,116.00,47.06,51.57,60.92,16.52,610.00,117.60,37.91
2021-02-12,53.13,41.32,37.00,40.66,56.02,147.89,83.18,43.46,24.62,108.04,...,46.66,28.16,116.08,46.41,51.68,60.20,16.50,609.08,121.31,36.97
2021-02-16,52.06,41.35,36.96,38.95,57.70,146.00,87.11,43.23,25.30,108.17,...,46.00,28.71,114.68,46.50,51.75,60.70,18.48,601.56,122.70,36.16


In [12]:
# sample symbol
symbol = 'ACHC'

In [13]:
annual_returns = bb.annualize_returns(df, timeperiod='daily', years=1)
annual_returns[symbol]

52.18667137821531

In [14]:
# calculate annualized returns
annual_returns_1mo = bb.annualize_returns(df, timeperiod='daily', years=1/12)
annual_returns_3mo = bb.annualize_returns(df, timeperiod='daily', years=3/12)
annual_returns_1yr = bb.annualize_returns(df, timeperiod='daily', years=1)
annual_returns_3yr = bb.annualize_returns(df, timeperiod='daily', years=3)
annual_returns_5yr = bb.annualize_returns(df, timeperiod='daily', years=5)

In [15]:
# calculate volatility
daily_returns = df.pct_change()
years = bb.TRADING_DAYS_PER_MONTH / bb.TRADING_DAYS_PER_YEAR
vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years)
vola[symbol]

0.27303704915510346

In [16]:
# calculate downside volatility
ds_vola = bb.annualized_standard_deviation(daily_returns, timeperiod='daily', years=years, downside=True)
ds_vola[symbol]

0.12744647789588345

In [17]:
# resample df on a monthly basis
df.index = pd.to_datetime(df.index)
monthly = df.resample('M').ffill()
bb.print_full(monthly[symbol])

Date
2015-01-31   57.75
2015-02-28   63.23
2015-03-31   71.60
2015-04-30   68.50
2015-05-31   74.14
2015-06-30   78.33
2015-07-31   79.78
2015-08-31   73.03
2015-09-30   66.27
2015-10-31   61.41
2015-11-30   69.01
2015-12-31   62.46
2016-01-31   61.03
2016-02-29   55.41
2016-03-31   55.11
2016-04-30   63.19
2016-05-31   58.87
2016-06-30   55.40
2016-07-31   56.50
2016-08-31   51.19
2016-09-30   49.55
2016-10-31   35.96
2016-11-30   38.01
2016-12-31   33.10
2017-01-31   38.37
2017-02-28   44.72
2017-03-31   43.60
2017-04-30   43.58
2017-05-31   41.34
2017-06-30   49.38
2017-07-31   52.93
2017-08-31   46.94
2017-09-30   47.76
2017-10-31   31.36
2017-11-30   31.83
2017-12-31   32.63
2018-01-31   34.08
2018-02-28   38.10
2018-03-31   39.18
2018-04-30   35.58
2018-05-31   40.19
2018-06-30   40.91
2018-07-31   39.48
2018-08-31   41.53
2018-09-30   35.20
2018-10-31   41.50
2018-11-30   33.97
2018-12-31   25.71
2019-01-31   27.36
2019-02-28   26.29
2019-03-31   29.31
2019-04-30   32.02
2019-05

In [18]:
# calculate monthly returns
monthly_returns = monthly.pct_change()
monthly_returns[symbol]

Date
2015-01-31     nan
2015-02-28    0.09
2015-03-31    0.13
2015-04-30   -0.04
2015-05-31    0.08
              ... 
2020-10-31    0.21
2020-11-30    0.19
2020-12-31    0.18
2021-01-31    0.01
2021-02-28    0.02
Freq: M, Name: ACHC, Length: 74, dtype: float64

In [19]:
# calculate standard deviation
std_dev = bb.annualized_standard_deviation(monthly_returns, timeperiod='monthly', years=3)
std_dev[symbol]

0.4916688479385516

In [20]:
# read investment-options-header.csv
lines = []
with open('investment-options-in.csv', 'r') as f:
    lines = [line.strip() for line in f]
lines

['# Description: S&P 400 investment options. 10/02/2020',
 '',
 '# Format',
 '"Investment Option","Description","Asset Class","1 mo","3 mo","1 Yr","3 Yr","5 Yr","Vola","DS Vola","Std Dev"',
 '# Note: "Description" field is optional']

In [21]:
# for each symbol, write out the 1 Yr, 3 Yr, 5 Yr, and std dev
out = lines.copy()

# This is still slow (2.53 s)
for i, (index, row) in enumerate(sp400.iterrows()):

    symbol = index
    description = row['Description']
    asset_class = row['Asset Class']

    ret_1mo = annual_returns_1mo[symbol]
    ret_3mo = annual_returns_3mo[symbol]
    ret_1yr = annual_returns_1yr[symbol]
    ret_3yr = annual_returns_3yr[symbol]
    ret_5yr = annual_returns_5yr[symbol]

    _vola = vola[symbol]*100
    _ds_vola = ds_vola[symbol]*100
    sd = std_dev[symbol]*100

    out.append(
        '"{}","{}","{}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}","{:0.2f}"'
        .format(symbol, description, asset_class,
                ret_1mo, ret_3mo, ret_1yr, ret_3yr, ret_5yr, _vola, _ds_vola, sd)) 

In [22]:
# write out asset-classes.csv
with open('investment-options.csv', 'w') as f:
    for line in out:
        f.write(line + '\n')