Below, I install my own implementation of Professor Boonstra's "memoize DataFrame to disk" feature. The source code can be found at [github.com/ethho/memoize](https://github.com/ethho/memoize).

In [1]:
!python3 -m pip install --quiet git+https://github.com/ethho/memoize.git

In [2]:
import json
import re
import os
from glob import glob
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import norm, probplot
import quandl
import functools
import plotly.express as px
import plotly.graph_objects as go
from memoize.dataframe import memoize_df

%matplotlib inline
pd.options.display.float_format = '{:,.4f}'.format

DARK_MODE = False
# TODO

# 20230217_hw6_ho_ethan_12350006

@mpcs
@finm33550

Ethan Ho 2/17/2023

----

## Configuration & Helper Functions

The following cell contains helper functions and configuration options that I will use in this notebook.

In [5]:
def get_secrets(fp='./secrets.json'):
    """
    Reads secret values such as API keys from a JSON-formatted file at `fp`.
    """
    with open(fp, 'r') as f:
        data = json.load(f)
    return data

def get_quandl_api_key() -> str:
    """
    Returns Quandl API key stored in secrets.json.
    """
    secrets = get_secrets()
    key = secrets.get('NASTAQ_DATA_API_KEY')
    assert key, f"NASTAQ_DATA_API_KEY field in secrets.json is empty or does not exist"
    return key

def strip_str_dtypes(df: pd.DataFrame) -> pd.DataFrame:
    """
    Given a DataFrame, strips values in columns with string or object
    dtype. I noticed that this was an issue when I saw some m_ticker values
    like "AAPL       " with trailing whitespace.
    """
    for col in df.columns:
        if pd.api.types.is_string_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
            df[col] = df[col].str.strip()
    return df

@memoize_df(cache_dir='data/memoize', cache_lifetime_days=None)
def fetch_quandl_table(
    name, start_date, end_date, **kw
) -> pd.DataFrame:
    df = quandl.get_table(
        name,
        date={'gte': start_date, 'lte': end_date},
        api_key=get_quandl_api_key(),
        paginate=True,
        **kw
    )
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by='date', inplace=True)
    df.reset_index(inplace=True)
    return df

@memoize_df(cache_dir='data/memoize', cache_lifetime_days=None)
def fetch_quandl_quotemedia_prices(
    start_date, end_date, ticker
) -> pd.DataFrame:
    return fetch_quandl_table(
        name= 'QUOTEMEDIA/PRICES',
        start_date=start_date,
        end_date=end_date,
        ticker=ticker,
    )

def unique_index_keys(df, level=0) -> List[str]:
    return df.index.get_level_values(level=level).unique().tolist()

def get_next_day_of_week(date, day_of_week: int) -> str:
    """
    Monday = 0, Wednesday = 2
    """
    as_dt = pd.to_datetime(date)
    days_until = (day_of_week - as_dt.day_of_week) % 7
    out_dt = as_dt + pd.to_timedelta(days_until, 'D')
    return out_dt.strftime('%Y-%m-%d')

# Fetch Data

First, let's set our time indices. We choose to trade weekly on Wednesdays, and skip the week if the Wednesday falls on a holiday.

In [154]:
start_date = '2017-12-23'
end_date = '2022-12-30'

daily_idx = pd.date_range(start_date, end_date)
first_wed = get_next_day_of_week(start_date, 2)
wed_idx_w_holidays = pd.date_range(first_wed, end_date, freq='7D')
assert all(date.day_of_week == 2 for date in wed_idx_w_holidays)

wed_idx = [
    date for date in wed_idx_w_holidays
    if date not in pd.to_datetime([
        # Remove Wednesdays that fall on holidays
        '2012-12-26', '2013-12-25', '2014-01-01', '2018-12-26',
        '2019-12-25', '2020-01-01',
    ])
]
assert len(wed_idx_w_holidays) > len(wed_idx)

Now we load the 5 year CDS rates:

In [197]:
impliedratingmap = {
    'AA': 5,
    'A': 4,
    'BBB': 3,
    'BB': 2,
    'B': 1,
}

@memoize_df(cache_dir='./data/memoize', cache_lifetime_days=None)
def _get_cds_quotes(fp='./data/Liq5YCDS.delim'):
    df = pd.read_csv(fp, delim_whitespace=True)
    assert not df.isnull().any().any()
    df.drop(columns=['docclause', 'tier', 'currency', 'tenor'], inplace=True)
    df['ticker'] = df['ticker'].str.upper().str.strip()
    assert not df.duplicated(['date', 'ticker']).any()
    df['impliedrating'] = df['impliedrating'].apply(lambda x: impliedratingmap[x]).astype(int)
    return df

def get_cds_quotes(fp='./data/Liq5YCDS.delim'):
    df = _get_cds_quotes(fp)
    df['date'] = pd.to_datetime(df['date'])
    df = df.convert_dtypes()
    df.sort_values(by=['date', 'ticker'], inplace=True)
    df['next_wed'] = pd.to_datetime(df['date'].apply(get_next_day_of_week, args=(1, )))
    df = df.groupby(['ticker', 'next_wed'], group_keys=True).aggregate('last')
    df.drop(columns=['date'], inplace=True)
    df.index.set_names(['ticker', 'date'], inplace=True)
    assert not df.isnull().any().any()
    return df

def get_cds_return():
    df = get_cds_quotes()
    sh = df.groupby(level=0).shift(1)
    change = 1 + ((df - sh) / sh)
    change.rename(columns={col: f"{col}_ret" for col in change.columns}, inplace=True)
    df = df.merge(change, how='left', left_index=True, right_index=True)
    return df


cds = get_cds_return()
cds

Using cache fp='./data/memoize/_get_cds_quotes_558128f_20230215.csv' to write results of function _get_cds_quotes
Using cached call from ./data/memoize/_get_cds_quotes_558128f_20230215.csv


Unnamed: 0_level_0,Unnamed: 1_level_0,parspread,upfront,runningcoupon,cdsrealrecovery,cdsassumedrecovery,impliedrating,parspread_ret,upfront_ret,runningcoupon_ret,cdsrealrecovery_ret,cdsassumedrecovery_ret,impliedrating_ret
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
BA,2018-01-02,0.0018,-0.0390,0.0100,0.4000,0.4000,3,,,,,,
BA,2018-01-09,0.0016,-0.0398,0.0100,0.4000,0.4000,3,0.8847,1.0194,1.0000,1.0000,1.0000,1.0000
BA,2018-01-16,0.0017,-0.0389,0.0100,0.4000,0.4000,3,1.0939,0.9775,1.0000,1.0000,1.0000,1.0000
BA,2018-01-23,0.0018,-0.2248,0.0500,0.4000,0.4000,3,1.0296,5.7817,5.0000,1.0000,1.0000,1.0000
BA,2018-01-30,0.0017,-0.2239,0.0500,0.4000,0.4000,3,0.9890,0.9960,1.0000,1.0000,1.0000,1.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
XRX,2022-12-06,0.0424,0.1306,0.0100,0.4000,0.4000,1,1.0050,-4.1325,0.2000,1.0000,1.0000,1.0000
XRX,2022-12-13,0.0410,0.1253,0.0100,0.4000,0.4000,1,0.9680,0.9596,1.0000,1.0000,1.0000,1.0000
XRX,2022-12-20,0.0422,0.1295,0.0100,0.4000,0.4000,1,1.0286,1.0332,1.0000,1.0000,1.0000,1.0000
XRX,2022-12-27,0.0424,0.1294,0.0100,0.4000,0.4000,1,1.0046,0.9996,1.0000,1.0000,1.0000,1.0000


Now, we fetch adjusted close prices for the corresponding equities:

In [198]:
tickers = cds.index.get_level_values(0).unique().tolist() + ['SPY']

def get_eod_quotes():
    eod_dict = dict()
    for ticker in tickers:
        eod = fetch_quandl_quotemedia_prices(
            start_date=start_date,
            end_date=end_date,
            ticker=ticker,
        )
        eod = eod[[
            'date',
            'adj_close',
        ]]
        eod['date'] = pd.to_datetime(eod['date'])
        eod.rename(columns=dict(adj_close=ticker), inplace=True)
        eod = eod.set_index('date')[ticker]
        eod.name = ticker
        eod.sort_index(inplace=True)
        eod_dict[ticker] = eod
    df = pd.DataFrame(eod_dict)
    df = df.reset_index().convert_dtypes()
    df.sort_values(by=['date'], inplace=True)
    df['next_wed'] = pd.to_datetime(df['date'].apply(get_next_day_of_week, args=(1, )))
    df = df.groupby('next_wed', group_keys=True).aggregate('last')
    df.drop(columns=['date'], inplace=True)
    df = df.stack().swaplevel().to_frame(name='eod')
    df.index.set_names(['ticker', 'date'], inplace=True)
    df.sort_index(level=[0, 1], inplace=True)
    assert not df.isnull().any().any()
    return df

def get_eod_return():
    df = get_eod_quotes()
    sh = df.groupby(level=0).shift(1)
    change = 1 + ((df - sh) / sh)
    change.rename(columns={col: f"{col}_ret" for col in change.columns}, inplace=True)
    df = df.merge(change, how='left', left_index=True, right_index=True)
    return df

eod = get_eod_return()
eod#.loc[(slice(None), '2019-01-08'), :].head(3)

Using cache fp='data/memoize/fetch_quandl_quotemedia_prices_52347df_20230215.csv' to write results of function fetch_quandl_quotemedia_prices
Using cached call from data/memoize/fetch_quandl_quotemedia_prices_52347df_20230215.csv
Using cache fp='data/memoize/fetch_quandl_quotemedia_prices_913c85c_20230215.csv' to write results of function fetch_quandl_quotemedia_prices
Using cached call from data/memoize/fetch_quandl_quotemedia_prices_913c85c_20230215.csv
Using cache fp='data/memoize/fetch_quandl_quotemedia_prices_040b924_20230215.csv' to write results of function fetch_quandl_quotemedia_prices
Using cached call from data/memoize/fetch_quandl_quotemedia_prices_040b924_20230215.csv
Using cache fp='data/memoize/fetch_quandl_quotemedia_prices_8477762_20230215.csv' to write results of function fetch_quandl_quotemedia_prices
Using cached call from data/memoize/fetch_quandl_quotemedia_prices_8477762_20230215.csv
Using cache fp='data/memoize/fetch_quandl_quotemedia_prices_122b182_20230215.csv

Unnamed: 0_level_0,Unnamed: 1_level_0,eod,eod_ret
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1
BA,2017-12-26,281.4210,
BA,2018-01-02,282.8312,1.0050
BA,2018-01-09,303.4022,1.0727
BA,2018-01-16,319.3427,1.0525
BA,2018-01-23,319.7524,1.0013
...,...,...,...
XRX,2022-12-06,15.3300,0.9796
XRX,2022-12-13,16.5300,1.0783
XRX,2022-12-20,14.7100,0.8899
XRX,2022-12-27,14.7000,0.9993


In [199]:
eod.loc[(slice(None), slice(None, '2018-01-09')), :].head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,eod,eod_ret
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1
BA,2017-12-26,281.421,
BA,2018-01-02,282.8312,1.005
BA,2018-01-09,303.4022,1.0727
C,2017-12-26,63.4956,
C,2018-01-02,63.1389,0.9944
C,2018-01-09,63.9456,1.0128
DD,2017-12-26,92.849,
DD,2018-01-02,93.4468,1.0064
DD,2018-01-09,97.9177,1.0478
F,2017-12-26,9.8221,


# Scratch