In [1]:
import numpy
from collections import deque
import pandas
import math
import pandas_datareader.data as web
import datetime
import requests
import requests_cache
import xlrd
import tempfile
import itertools

  from pandas.util.testing import assert_frame_equal


In [2]:
def get_fred(fred_series):
    expire_after = datetime.timedelta(days=3)
    session = requests_cache.CachedSession(cache_name='data-cache', backend='sqlite', expire_after=expire_after)
    
    start = datetime.datetime(1800, 1, 1)
    df = web.DataReader(fred_series, "fred", start, session=session)
    return df

# All FRED data can be found at https://fred.stlouisfed.org/series/SERIES_NAME
FRED_SERIES = [
    'M1329AUSM193NNBR', # 1920-1934: Yields on Short-Term United States Securities, Three-Six Month Treasury Notes and Certificates, Three Month Treasury Bills for United States
    'M1329BUSM193NNBR', # 1931-1969: Yields on Short-Term United States Securities, Three-Six Month Treasury Notes and Certificates, Three Month Treasury Bills for United States
    'M13009USM156NNBR', # 1914-1969: Discount Rates, Federal Reserve Bank of New York for United States
    'M13044USM156NNBR', # 1951-1967: Yields on Corporate Bonds, New Issues, Aa Rating for United States
    'M13035USM156NNBR', # 1919-1968: Yields on Corporate Bonds, Highest Rating for United States
    'M1333AUSM156NNBR', # 1919-1944: Yield on Long-Term United States Bonds for United States
    'M13058USM156NNBR', # 1942-1962: Yields on Twenty Year United States Government Bonds for United States
    'BAA', # 1919-2016
    'AAA', # 1919-2016
    'CP1M', # 1971-1997
    'CP3M', # 1971-1997
    'CP6M', # 1970-1997
    'GS1M', # 2001-: 1-Month Treasury Constant Maturity Rate
    'GS3M', # 1982- : 3-Month Treasury Constant Maturity Rate
    'GS6M', # 1982- : 6-Month Treasury Constant Maturity Rate
    'GS1', # 1953-
    'GS2', # 1976-
    'GS3', # 1953-
    'GS5', # 1953-
    'GS7', # 1969-
    'GS10', # 1953-
    'GS20', # 1953-
    'GS30', # 1977-
    'CD1M', # 1965-2013
    'CD3M', # 1964-2013
    'CD6M', # 1964-2013
    'TB1YR', # 1959- (with gaps)
    'TB4WK', # 2001-
    'TB3MS', # 1934-
    'TB6MS', # 1958-
    'MSLB20', # 1953-216 State and Local Bonds - Bond Buyer Go 20-Bond Municipal Bond Index (DISCONTINUED)
    'LTGOVTBD', # 1925-2000 Long-Term U.S. Government Securities (DISCONTINUED)
]

fred = get_fred(FRED_SERIES)
fred.head()

Unnamed: 0_level_0,M1329AUSM193NNBR,M1329BUSM193NNBR,M13009USM156NNBR,M13044USM156NNBR,M13035USM156NNBR,M1333AUSM156NNBR,M13058USM156NNBR,BAA,AAA,CP1M,...,GS30,CD1M,CD3M,CD6M,TB1YR,TB4WK,TB3MS,TB6MS,MSLB20,LTGOVTBD
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1914-11-01,,,5.75,,,,,,,,...,,,,,,,,,,
1914-12-01,,,5.4,,,,,,,,...,,,,,,,,,,
1915-01-01,,,4.75,,,,,,,,...,,,,,,,,,,
1915-02-01,,,4.19,,,,,,,,...,,,,,,,,,,
1915-03-01,,,4.0,,,,,,,,...,,,,,,,,,,


In [3]:
def shiller_date_converter(x):
    year = int(x)
    month = int(x * 100) - (year * 100)
    dx = datetime.date(year, month, 1)
    return dx

def get_shiller(url="http://www.econ.yale.edu/~shiller/data/ie_data.xls"):
    expire_after = datetime.timedelta(days=3)
    session = requests_cache.CachedSession(cache_name='data-cache', backend='sqlite', expire_after=expire_after)

    r = session.get(url, stream=True)

    with tempfile.NamedTemporaryFile(suffix='.xls') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            tmp.write(chunk)
            
        tmp.flush()

        df = pandas.read_excel(tmp.name,
                               sheet_name='Data',
                               engine='xlrd',
                               skiprows=7,
                               skipfooter=11, # WARN: is this always 11?
                               index_col=0,
                               usecols="A:E,G:K")
        df.rename(columns={'Price' : 'Real Price',
                          'Dividend' : 'Real Dividend',
                          'Earnings' : 'Real Earnings'}, inplace=True)
        # Convert from Shiller's wonky date format to a real one
        df.index = pandas.DatetimeIndex([shiller_date_converter(n) for n in df.index])
    return df

shiller = get_shiller()
shiller.head()

Unnamed: 0,P,D,E,CPI,Rate GS10,Real Price,Real Dividend,Price.1,Real Earnings
1871-01-01,4.44,0.26,0.4,12.464061,5.32,91.477836,5.35681,91.477836,8.241246
1871-02-01,4.5,0.26,0.4,12.844641,5.323333,89.966954,5.198091,90.400128,7.997063
1871-03-01,4.61,0.26,0.4,13.034972,5.326667,90.820379,5.122191,91.686566,7.880293
1871-04-01,4.74,0.26,0.4,12.559226,5.33,96.918779,5.31622,98.290373,8.1788
1871-05-01,4.86,0.26,0.4,12.273812,5.333333,101.683222,5.439843,103.581978,8.368989


In [5]:
def get_shiller_ch26(url='http://www.econ.yale.edu/~shiller/data/chapt26.xlsx'):
    expire_after = datetime.timedelta(days=3)
    session = requests_cache.CachedSession(cache_name='data-cache', backend='sqlite', expire_after=expire_after)

    r = session.get(url, stream=True)

    with tempfile.NamedTemporaryFile(suffix='.xls') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            tmp.write(chunk)
            
        tmp.flush()

        df = pandas.read_excel(tmp.name,
                               sheet_name='Data',
                               engine='xlrd',
                               skiprows=7,
                               skipfooter=12, # WARN: is this always 12?
                               index_col=0,
                               usecols="A,E")
        df.rename(columns={'Unnamed: 1': 'One-Year Interest Rate'}, inplace=True)
        df.index = pandas.DatetimeIndex([datetime.date(n, 1, 1) for n in df.index])
    return df

SHILLER_CHAPTER_26 = get_shiller_ch26()
SHILLER_CHAPTER_26.head()

Unnamed: 0,Unnamed: 4
1871-01-01,6.35
1872-01-01,7.81
1873-01-01,8.35
1874-01-01,6.86
1875-01-01,4.96


In [6]:
# Mix all of our data sources together!
all_by_month = pandas.concat([shiller, fred, SHILLER_CHAPTER_26], axis=1, join='outer')

In [7]:
# We only have annual interest rates from Shiller. We need to interpolate monthly rates...
all_by_month['One-Year Interest Rate'].interpolate(inplace=True)

KeyError: 'One-Year Interest Rate'

In [8]:
def iterate_fund(ladder, yield_curve, max_maturity):
    ladder.reduce_maturities()
    ladder.generate_payments()
#    print(ladder.cash)
#    import pdb;pdb.set_trace()
    sold_bonds = ladder.sell_bonds(yield_curve)

    # Only buy a new bond if we actually sold one...
    if sold_bonds:
        ladder.buy_bond(yield_curve[max_maturity-1], max_maturity)
    
    # This happens *after* we sell the shortest bond and buy a new long one
    # (at least, that's what longinvest does...)
    nav = ladder.get_nav(yield_curve)

    return (ladder, nav)

In [None]:
def a2m(annual_rate):
    return pow(annual_rate + 1, 1/12) - 1

class Bond:
    def __init__(self, face_value, yield_pct, maturity, payments_per_year=12):
        self.face_value = face_value
        self.yield_pct = yield_pct
        self.maturity = maturity
        self.payments_per_year = payments_per_year
        
    def __repr__(self):
        return ('Maturity: %d | Yield: %.2f%% | Face Value: $%.2f' % (self.maturity, self.yield_pct * 100, self.face_value))

    def gen_payment(self):
        return self.face_value * self.yield_pct / self.payments_per_year
    
    def value(self, rates):
        value = numpy.pv(rates[self.maturity - 1], self.maturity / 12, (self.face_value * self.yield_pct), self.face_value)
        return -value
    
class BondLadder:
    def __init__(self, min_maturity, max_maturity):
        self.min_maturity = min_maturity
        self.max_maturity = max_maturity
        self.cash = 0
        
        self.ladder = set()
        
    def print_all(self):
        for bond in sorted(self.ladder, key=lambda b: b.maturity):
            print(bond)
            
    def print_all_values(self, rates):
        for bond in sorted(self.ladder, key=lambda b: b.maturity):
            print(bond.value(rates))
        
    def buy_bond(self, rate, maturity):
        b = Bond(self.cash, rate, maturity)
        self.add_bond(b)
        self.cash = 0
        return b
        
    def get_nav(self, rates):
        return self.cash + sum((b.value(rates) for b in self.ladder))

    def generate_payments(self):
        self.cash += sum((b.gen_payment() for b in self.ladder))        
        
    def __repr__(self):
        return ('%d-%d Ladder { Num Bonds: %d. }' % (self.max_maturity, self.min_maturity, len(self.ladder)))
        
    def add_bond(self, bond):
        #assert bond.maturity <= self.max_maturity
        #assert bond.maturity >= self.min_maturity
        self.ladder.add(bond)
    
    def reduce_maturities(self):
        for bond in self.ladder:
            bond.maturity -= 1

    def sell_bonds(self, rates):
        to_sell = filter(lambda bond: bond.maturity <= self.min_maturity, self.ladder)
        to_sell = list(to_sell)
        self.ladder = self.ladder.difference(to_sell)
        self.cash += sum((b.value(rates) for b in to_sell))
        return to_sell

In [None]:
def bootstrap(yield_curve, max_bonds, min_maturity):
    bond_yield = yield_curve[max_bonds - 1]

    # Why - 11?
    #min_maturity -= 11

    ladder = BondLadder(min_maturity, max_bonds)
    starting_face_value = 50 # chosen arbitrarily (to match longinvest)

    for i, j in zip(range(max_bonds), range(min_maturity, max_bonds+1)):
        face_value = pow(1 + a2m(bond_yield), i) * starting_face_value
        b = Bond(face_value, bond_yield, j)
        ladder.add_bond(b)
    return ladder
bootstrap([.0532]*120, 10*12, 5*12)

In [None]:
def splice_data(raw_rates, series):
    # Start by loading the data we get from Shiller.
    # This will always exist.

    def safe_add(series_index, rate_index):
        # Don't overwrite any data we already have.
        if math.isnan(series.iloc[series_index]):
            series.iloc[series_index] = raw_rates[rate_index]

    safe_add(1 * 12 - 1, 'GS1')
    safe_add(1 * 12 - 1, 'One-Year Interest Rate')
    safe_add(10 * 12 - 1, 'Rate GS10')

    safe_add(2 * 12 - 1, 'GS2')
    safe_add(3 * 12 - 1, 'GS3')
    safe_add(5 * 12 - 1, 'GS5')
    safe_add(7 * 12 - 1, 'GS7')
    safe_add(10 * 12 - 1, 'GS10')
    safe_add(20 * 12 - 1, 'GS20')
    safe_add(30 * 12 - 1, 'GS30')

    safe_add(0, 'TB4WK')
    safe_add(0, 'CD1M')

    safe_add(3 - 1, 'TB3MS')
    safe_add(3 - 1, 'M1329AUSM193NNBR')

    safe_add(6 - 1, 'TB6MS')

def build_yield_curve(raw_rates, yield_curve_size=30*12):
    s = pandas.Series(math.nan, index=numpy.arange(yield_curve_size))

    # We use NaN to indicate "the data needs to be interpolated"
    # We have a few different data series that we splice together.
    splice_data(raw_rates, s)
    
    # This will do linear interpolation where it can.
    s.interpolate(inplace=True)
    
    # But it can still leave us with NaNs at the low end of the range
    s.fillna(method='backfill', inplace=True)
    
    # all of the data is in the form 3.71 but we want it to be .0371,
    # since that's what a percent actually is
    return s.apply(lambda x: x / 100).tolist()

In [None]:
['%.3f' % (s*100) for s in build_yield_curve(all_by_month.iloc[0])]

In [None]:
bootstrap(build_yield_curve(all_by_month.iloc[-2]), 30 * 12, 4 * 12)

In [None]:
def loop(ladder, rates, max_maturity):
    df = pandas.DataFrame(columns=['NAV', 'Change'])

    # The first iterations have fake data with duplicate years
    # But that's okay because we overwrite them with later data
    # (since they all have the same year)
    for (year, current_rates) in rates:
        if year.year % 5 == 0 and year.month == 1:
            print('Calculating...', year.year)
        (ladder, nav) = iterate_fund(ladder, build_yield_curve(current_rates), max_maturity)
        df.loc[year] = {'NAV' : nav, 'Change' : None}

    calculate_returns(df)
    return df

def calculate_returns(df):
    # Longinvest calculates the return based on comparison's to
    # next year's NAV. So I'll do the same. Even though that seems
    # weird to me. Maybe it's because the rates are based on January?
    # Hmmm...that sounds plausible.
    max_row = df.shape[0]

    for i in range(max_row - 1):
        next_nav = df.iloc[i+1]['NAV']
        nav = df.iloc[i]['NAV']
        change = (next_nav - nav) / nav
        df.iloc[i]['Change'] = change
    return df

def make_annual_ladder(max_maturity, min_maturity, yields):
    rate = yields[max_maturity - 1]
    
    # We have to add the "- 12" in order to make things like up with how
    # longinvest runs things. His "10-4" ladder is really more of "10-3" ladder:
    # bonds get sold the moment they become a 3 year bond.
    ladder = BondLadder(min_maturity - 12, max_maturity)

    face_value = 50
    for i in range(min_maturity, max_maturity + 1, 12):
        ladder.add_bond(Bond(face_value, rate, i))
        face_value = face_value * (1 + rate)

    return ladder

def simulate_monthly_turnover(max_maturity, min_maturity, rates):
    min_maturity = min_maturity * 12
    max_maturity = max_maturity * 12

    initial_yields = build_yield_curve(rates.iloc[0])
    ladder = bootstrap(initial_yields, max_maturity, min_maturity)

    return loop(ladder, rates.iterrows(), max_maturity)

def simulate_annual_turnover(max_maturity, min_maturity, rates):
    min_maturity = min_maturity * 12
    max_maturity = max_maturity * 12

    initial_yields = build_yield_curve(rates.iloc[0])
    ladder = make_annual_ladder(max_maturity, min_maturity, initial_yields)

    # longinvest actually simulates 1870 and assumes 1871 rates. That's why,
    # when the simulation starts in January 1871, all the bonds have paid 1 year
    # of interest and one of the bonds is ready to be sold.
    # So we need to generate 11 months of fake data to do the same simulation.
    # Why 11 months? The 12th month is the real January 1871 data.
    first_index = rates.head(1).index
    bootstrap_rates = itertools.repeat(next(rates.iterrows()), 11)
    all_rates = itertools.chain(bootstrap_rates, rates.iterrows())

    return loop(ladder, all_rates, max_maturity)

In [None]:
%%time
sim_results = simulate_annual_turnover(30, 20, all_by_month)
#sim_results = simulate_monthly_turnover(3, 1, all_by_month)
print(sim_results.head())

In [None]:
sim_results.to_csv('bonds-monthly.csv')

In [None]:
def get_morningstar(secid):
    url = 'http://mschart.morningstar.com/chartweb/defaultChart?type=getcc&secids=%s&dataid=8225&startdate=1900-01-01&enddate=2016-11-18&currency=&format=1' % secid
    expire_after = datetime.timedelta(days=3)
    session = requests_cache.CachedSession(cache_name='data-cache', backend='sqlite', expire_after=expire_after)

    # TODO: why doesn't this work!?!
    r = session.get(url)
    #r = requests.get(url)
    j = r.json()
    
    # The Morningstar data is pretty deeply nested....
    m = j['data']['r'][0]
    assert m['i'] == secid
    
    actual_data = m['t'][0]['d']
    # convert from strings to real data types
    as_dict = dict([(datetime.datetime.strptime(n['i'], '%Y-%m-%d'), float(n['v'])) for n in m['t'][0]['d']])
    
    # Strip out data?
    # Do we only want start of month, end of month, start of year, end of year, etc?
    s = pandas.Series(as_dict, name=secid)

    return s

barclays_index = get_morningstar('XIUSA000MJ')
#barclays_index = get_morningstar('XIUSA000CT')

# Use only final value for each calendar year
def annual(series):
    return series.groupby(by=lambda x: x.year).last()
# Use only final value for each calendar month
def monthly(series):
    return series.groupby(by=lambda x: datetime.date(x.year, x.month, 1)).last()

In [None]:
monthly_results = pandas.concat([sim_results, monthly(barclays_index)], axis=1, join='outer')

def calculate_change_prev(df, column):
    max_row = df.shape[0]
    
    series = pandas.Series()

    for i in range(max_row - 1):
        val = df.iloc[i][column]
        prev_val = df.iloc[i-1][column]
        change = (val - prev_val) / prev_val
        series.loc[df.iloc[i].name] = change
    return series

def calculate_change_next(df, column):
    max_row = df.shape[0]
    
    series = pandas.Series()

    for i in range(max_row - 1):
        val = df.iloc[i][column]
        next_val = df.iloc[i+1][column]
        change = (next_val - val) / val
        series.loc[df.iloc[i].name] = change
    return series

index_change = calculate_change_prev(monthly_results, 'XIUSA000MJ')
monthly_results = monthly_results.assign(index_change=index_change)
print(monthly_results[["Change", "index_change"]].corr())

In [None]:
# This is correct when doing monthly calculations.
annual_results = monthly_results.groupby(by=lambda x: x.year).first()
s_nav = calculate_change_next(annual_results, 'NAV')
s_ind = calculate_change_next(annual_results, 'XIUSA000MJ')
annual_results = annual_results.assign(Change=s_nav, index_change=s_ind)
print(annual_results[["Change", "index_change"]].corr())
annual_results.to_csv('bonds-monthly-annual.csv')
print(annual_results.head())

In [None]:
BarclaysAggregateBondIndex = 'XIUSA000MC'
