In [1]:
import json
import re
from typing import List, Dict, Tuple
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import quandl
import functools
import seaborn as sns

%matplotlib inline

# 20230104_hw1

Ethan Ho 1/4/2023

----

This notebook was forked from `example_notebooks/Quandl_Options.ipynb`.

Homework prompt, copied from PDF:

> Obtain second month quarterly 1 futures prices from the Quandl OWF database for two pair W, X; Y, Z (where W, X and Y and Z depend on your student ID number) for 3 Dec 2020 though 31 Aug 2022. Our definition of second month is the contact where the number of days to futures expiration is the smallest available value greater than 302. Form the spreads s(1)
t , s(2) t between these numbers as the difference s(1) t = Xt −Wt and s(2) t = Zt −Yt.
>
> Characterize the relative dynamics of s(i)
t in reasonable ways, using charts
and statistics. For example, you could compute the median and standard deviation of
the difference d(N) t between s(i) t and an N-day rolling average of st for some
values of N. Examine more quantiles than just the median. Look at tails. Consider some dynamics, for example do the spreads correlate? How about their difference (d) values? Do spreads exhibit patterns over time?

The last 4 digits of my student ID are `0006`, so I'll be analyzing the following pairs:

- `0. ICE_TFM_TFM (W) versus NYM_NG_NG (X) × 13.7261`
- `6. CBT_TU_TU (Y) versus CBT_US_US (Z) × 0.6873`

## Configuration & Helper Functions

The following cell contains helper functions and configuration options that I will use in this notebook.

In [2]:
def get_secrets(fp='./secrets.json'):
    """
    Reads secret values such as API keys from a JSON-formatted file at `fp`.
    """
    with open(fp, 'r') as f:
        data = json.load(f)
    return data

def get_quandl_api_key() -> str:
    """
    Returns Quandl API key stored in secrets.json.
    """
    secrets = get_secrets()
    key = secrets.get('NASTAQ_DATA_API_KEY')
    assert key, f"NASTAQ_DATA_API_KEY field in secrets.json is empty or does not exist"
    return key

@functools.lru_cache(maxsize=1600)
def fetch_quandl(feeds: List[str], trim_start=None, trim_end=None):
    qdata = quandl.get(
        list(feeds),
        returns="pandas", 
        # trim_start=trim_start,
        # trim_end=trim_end,
        start_date=trim_start,
        end_date=trim_end,
        api_key=get_quandl_api_key()
    )
    return qdata

I start by determining which Quandl codes we need to fetch data for.
This is not as easy as I'd hoped; Quandl uses strange identifiers for the contract expiration month (why do they do this?) and I had to infer the following mapping to help generate Quandl codes.

In [3]:
# Mapping of month as integer to Quandl month identifier in Quandl code
EXP_ID_MAPPING = {
    1: "F", # January
    2: "G", # February
    3: "H", # March
    4: "J", # April
    5: "K", # May
    6: "M", # June
    7: "N", # July
    8: "Q", # August
    9: "U", # September
    10: "V", # October
    11: "X", # November
    12: "Z", # December
}

# Reverse mapping to be used by data cleaning functions
EXP_ID_MAPPING_REV = {v: k for k, v in EXP_ID_MAPPING.items()}

These utilities will help generate the list of Quandl codes we need, using the mapping above.

In [4]:
def get_quandl_code_ivm(exp_month: int, exp_year: int, stub: str) -> str:
    """Format Quandl code for IVM data."""
    exp_id = EXP_ID_MAPPING[exp_month]
    return f"{stub}_{exp_id}{exp_year}_IVM"

def get_quandl_codes_quarterly(start_year: int, end_year: int, stub: str) -> List[str]:
    """Get list of Quandl codes for all contracts that expire in `[start_year, end_year]`."""
    assert end_year >= start_year, f"end_year must be after start_year"
    codes = list()
    for year in range(start_year, end_year + 1):
        codes.extend([
            get_quandl_code_ivm(month, year, stub)
            for month in range(1, 13)
        ])
    return codes

We test this function and see that it works as expected:

In [5]:
get_quandl_codes_quarterly(2019, 2020, 'OWF/ICE_TFM_TFM')

['OWF/ICE_TFM_TFM_F2019_IVM',
 'OWF/ICE_TFM_TFM_G2019_IVM',
 'OWF/ICE_TFM_TFM_H2019_IVM',
 'OWF/ICE_TFM_TFM_J2019_IVM',
 'OWF/ICE_TFM_TFM_K2019_IVM',
 'OWF/ICE_TFM_TFM_M2019_IVM',
 'OWF/ICE_TFM_TFM_N2019_IVM',
 'OWF/ICE_TFM_TFM_Q2019_IVM',
 'OWF/ICE_TFM_TFM_U2019_IVM',
 'OWF/ICE_TFM_TFM_V2019_IVM',
 'OWF/ICE_TFM_TFM_X2019_IVM',
 'OWF/ICE_TFM_TFM_Z2019_IVM',
 'OWF/ICE_TFM_TFM_F2020_IVM',
 'OWF/ICE_TFM_TFM_G2020_IVM',
 'OWF/ICE_TFM_TFM_H2020_IVM',
 'OWF/ICE_TFM_TFM_J2020_IVM',
 'OWF/ICE_TFM_TFM_K2020_IVM',
 'OWF/ICE_TFM_TFM_M2020_IVM',
 'OWF/ICE_TFM_TFM_N2020_IVM',
 'OWF/ICE_TFM_TFM_Q2020_IVM',
 'OWF/ICE_TFM_TFM_U2020_IVM',
 'OWF/ICE_TFM_TFM_V2020_IVM',
 'OWF/ICE_TFM_TFM_X2020_IVM',
 'OWF/ICE_TFM_TFM_Z2020_IVM']

Now we can fetch data for these Quandl codes:

In [6]:
tfm_fut_data_raw = fetch_quandl(
    feeds=tuple(get_quandl_codes_quarterly(2020, 2022, 'OWF/ICE_TFM_TFM')),
    trim_start="2020-12-03",
    trim_end="2022-08-31"
)
tfm_fut_data_raw.head()

Unnamed: 0_level_0,OWF/ICE_TFM_TFM_F2020_IVM - Future,OWF/ICE_TFM_TFM_F2020_IVM - AtM,OWF/ICE_TFM_TFM_F2020_IVM - RR25,OWF/ICE_TFM_TFM_F2020_IVM - RR10,OWF/ICE_TFM_TFM_F2020_IVM - Fly25,OWF/ICE_TFM_TFM_F2020_IVM - Fly10,OWF/ICE_TFM_TFM_F2020_IVM - Beta1,OWF/ICE_TFM_TFM_F2020_IVM - Beta2,OWF/ICE_TFM_TFM_F2020_IVM - Beta3,OWF/ICE_TFM_TFM_F2020_IVM - Beta4,...,OWF/ICE_TFM_TFM_Z2022_IVM - Beta1,OWF/ICE_TFM_TFM_Z2022_IVM - Beta2,OWF/ICE_TFM_TFM_Z2022_IVM - Beta3,OWF/ICE_TFM_TFM_Z2022_IVM - Beta4,OWF/ICE_TFM_TFM_Z2022_IVM - Beta5,OWF/ICE_TFM_TFM_Z2022_IVM - Beta6,OWF/ICE_TFM_TFM_Z2022_IVM - MinMoney,OWF/ICE_TFM_TFM_Z2022_IVM - MaxMoney,OWF/ICE_TFM_TFM_Z2022_IVM - DtE,OWF/ICE_TFM_TFM_Z2022_IVM - DtT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-03,,,,,,,,,,,...,,,,,,,,,,
2020-12-04,,,,,,,,,,,...,,,,,,,,,,
2020-12-07,,,,,,,,,,,...,,,,,,,,,,
2020-12-08,,,,,,,,,,,...,,,,,,,,,,
2020-12-09,,,,,,,,,,,...,,,,,,,,,,


In [7]:
sorted(tfm_fut_data_raw.columns.tolist())

['OWF/ICE_TFM_TFM_F2020_IVM - AtM',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta1',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta2',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta3',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta4',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta5',
 'OWF/ICE_TFM_TFM_F2020_IVM - Beta6',
 'OWF/ICE_TFM_TFM_F2020_IVM - DtE',
 'OWF/ICE_TFM_TFM_F2020_IVM - DtT',
 'OWF/ICE_TFM_TFM_F2020_IVM - Fly10',
 'OWF/ICE_TFM_TFM_F2020_IVM - Fly25',
 'OWF/ICE_TFM_TFM_F2020_IVM - Future',
 'OWF/ICE_TFM_TFM_F2020_IVM - MaxMoney',
 'OWF/ICE_TFM_TFM_F2020_IVM - MinMoney',
 'OWF/ICE_TFM_TFM_F2020_IVM - RR10',
 'OWF/ICE_TFM_TFM_F2020_IVM - RR25',
 'OWF/ICE_TFM_TFM_F2021_IVM - AtM',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta1',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta2',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta3',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta4',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta5',
 'OWF/ICE_TFM_TFM_F2021_IVM - Beta6',
 'OWF/ICE_TFM_TFM_F2021_IVM - DtE',
 'OWF/ICE_TFM_TFM_F2021_IVM - DtT',
 'OWF/ICE_TFM_TFM_F2021_IVM - Fly10',
 'OWF/ICE_TFM_TFM_F

This query returned 576 columns with long column names. To start organizing this data, expiry month as a secondary index.

In [9]:
def parse_column(col: str, stub: str) -> Tuple:
    """
    Parses column named like 'OWF/ICE_TFM_TFM_F2021_IVM - Beta4'
    into its component parts.
    """
    pattern = f"^{stub}_([A-Z])([0-9]{{4}})_[A-Z]+\s*\-\s*([a-zA-Z0-9]+)$"
    match = re.match(pattern, col)
    assert match is not None, f"no match found for {col=}"
    groups = list(match.groups())
    exp_month = str(EXP_ID_MAPPING_REV[groups[0]])
    exp_year = groups[1]
    metric = str(groups[2])
    return (pd.to_datetime(f"{exp_year}-{exp_month}"), stub, metric)

def index_owf_futures_data(in_df: pd.DataFrame, stub: str = 'OWF/ICE_TFM_TFM') -> pd.DataFrame:
    """
    Stacks on Quandl code level and adds an `expiry_month` level to the index of DataFrame `df`.
    Expects columns in `df` to be formatted like 'OWF/ICE_TFM_TFM_F2021_IVM - Beta4'
    """
    # Parse the column names into expiry date and metric
    df = in_df.copy()
    col_repl = {
        original_col: parse_column(original_col, stub)
        for original_col in df.columns
    }
    df.rename(columns=col_repl, inplace=True)
    
    # Sack on the Quandl code level
    dfs = df.stack().to_frame(name='value')
    index_exploded = pd.MultiIndex.from_tuples(
        dfs.index.get_level_values(level=1),
        names=['expiry_month', 'stub', 'metric']
    )
    dfs.set_index(index_exploded, append=True, inplace=True)
    dfs.index = dfs.index.droplevel(level=1)
    dfu = dfs.unstack(-1)
    dfu.columns = dfu.columns.droplevel(0)
    return dfu

df = index_owf_futures_data(tfm_fut_data_raw, stub='OWF/ICE_TFM_TFM')
df

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,AtM,Beta1,Beta2,Beta3,Beta4,Beta5,Beta6,DtE,DtT,Fly10,Fly25,Future,MaxMoney,MinMoney,RR10,RR25
Date,expiry_month,stub,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-12-03,2021-01-01,OWF/ICE_TFM_TFM,0.536395,0.14203,1.98206,-1.392426,-25.637269,8.496346,146.467732,20.97,27.0,0.042598,0.015737,14.143,0.295219,-0.251325,0.047383,0.029886
2020-12-03,2021-02-01,OWF/ICE_TFM_TFM,0.532873,0.074218,0.678464,0.100921,-2.741348,-0.368969,5.300285,54.97,56.0,0.044021,0.015472,14.164,0.52735,-0.399412,0.061804,0.030883
2020-12-03,2021-03-01,OWF/ICE_TFM_TFM,0.54636,0.055339,0.357607,-0.053551,-0.610864,0.214282,0.413099,82.97,84.0,0.04028,0.013645,13.877,0.665604,-0.490167,0.055579,0.028814
2020-12-03,2021-04-01,OWF/ICE_TFM_TFM,0.476411,-0.103419,0.391547,0.247471,-1.407798,-0.328127,2.241637,112.97,117.0,0.030908,0.008907,13.068,0.565328,-0.555263,-0.050637,-0.027356
2020-12-03,2021-05-01,OWF/ICE_TFM_TFM,0.432201,-0.190207,0.138459,0.553965,0.111594,-0.879162,-0.596056,143.97,147.0,0.017798,0.001102,12.686,0.527554,-0.594589,-0.094821,-0.059071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-30,2022-11-01,OWF/ICE_TFM_TFM,1.42394,0.43524,0.284785,-0.662978,-1.017161,-0.34445,0.120205,57.97,59.0,,,269.943,0.393254,-0.89773,,
2022-08-30,2022-12-01,OWF/ICE_TFM_TFM,1.418362,0.41667,0.248323,-0.526927,-1.010242,-0.845885,-0.307945,89.97,91.0,,,270.401,0.391559,-1.056611,,
2022-08-31,2022-10-01,OWF/ICE_TFM_TFM,1.314304,0.082816,0.643933,2.295676,2.739447,-3.482284,-5.625811,27.97,29.0,,0.143579,239.907,0.628996,-0.67623,,0.295339
2022-08-31,2022-11-01,OWF/ICE_TFM_TFM,1.391645,0.43913,0.248203,-0.796241,-0.765982,0.63994,0.796434,56.97,58.0,,,244.418,0.492585,-0.873907,,


Unnamed: 0_level_0,OWF/ICE_TFM_TFM_F2020_IVM - Future,OWF/ICE_TFM_TFM_F2020_IVM - AtM,OWF/ICE_TFM_TFM_F2020_IVM - RR25,OWF/ICE_TFM_TFM_F2020_IVM - RR10,OWF/ICE_TFM_TFM_F2020_IVM - Fly25,OWF/ICE_TFM_TFM_F2020_IVM - Fly10,OWF/ICE_TFM_TFM_F2020_IVM - Beta1,OWF/ICE_TFM_TFM_F2020_IVM - Beta2,OWF/ICE_TFM_TFM_F2020_IVM - Beta3,OWF/ICE_TFM_TFM_F2020_IVM - Beta4,...,OWF/ICE_TFM_TFM_Z2022_IVM - Beta1,OWF/ICE_TFM_TFM_Z2022_IVM - Beta2,OWF/ICE_TFM_TFM_Z2022_IVM - Beta3,OWF/ICE_TFM_TFM_Z2022_IVM - Beta4,OWF/ICE_TFM_TFM_Z2022_IVM - Beta5,OWF/ICE_TFM_TFM_Z2022_IVM - Beta6,OWF/ICE_TFM_TFM_Z2022_IVM - MinMoney,OWF/ICE_TFM_TFM_Z2022_IVM - MaxMoney,OWF/ICE_TFM_TFM_Z2022_IVM - DtE,OWF/ICE_TFM_TFM_Z2022_IVM - DtT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-03,,,,,,,,,,,...,,,,,,,,,,
2020-12-04,,,,,,,,,,,...,,,,,,,,,,
2020-12-07,,,,,,,,,,,...,,,,,,,,,,
2020-12-08,,,,,,,,,,,...,,,,,,,,,,
2020-12-09,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-25,,,,,,,,,,,...,0.407677,0.286689,0.030080,0.525118,0.778826,0.290356,-1.111821,0.197512,94.97,96.0
2022-08-26,,,,,,,,,,,...,0.417134,0.416164,0.583171,1.550571,1.639493,0.558539,-1.095388,0.133903,93.97,95.0
2022-08-29,,,,,,,,,,,...,0.423328,0.280651,-0.524176,-1.171777,-1.076503,-0.399320,-1.081339,0.295005,90.97,92.0
2022-08-30,,,,,,,,,,,...,0.416670,0.248323,-0.526927,-1.010242,-0.845885,-0.307945,-1.056611,0.391559,89.97,91.0
