In [16]:
%load_ext autoreload
%autoreload 2

import traceback
from enum import Enum
from typing import Optional

import pandas as pd
import numpy as np
import numba as nb
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
from tqdm.auto import tqdm

from pyquant.common import *
from pyquant.vol_surface import VolSurfaceChainSpace
from pyquant.heston import *

plt.style.use("dark_background")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
class InstrumentId(Enum):
    BTC = 0
    ETH = 1

class InstrumentType(Enum):
    FUTURE = 1
    OPTION = 2
    FUTURE_COMBO = 3
    OPTION_COMBO = 4
    CALL_OPTION = 5
    PUT_OPTION = 6
    ASSET = 7

# Process data

In [3]:
def process_data(data: pd.DataFrame, instr_id: InstrumentId):
    YEAR_IN_MS = 365 * 24 * 3600 * 1000
    data['time_to_maturity'] = (data['maturity'] * 1000 - data['timestamp']) / YEAR_IN_MS
    data['price'] = -1.0

    # Where both best bid and ask are present, use midprice as a price
    data.loc[(data['best_bid_price'] > 0) & (data['best_ask_price'] > 0), "price"] = \
        (data['best_bid_price'] + data['best_ask_price']) / 2

    # At least one of best bid and best ask are present in data,
    # this was ensured at the data processing step.

    # Where best bid is not present, use best ask as a price
    data.loc[data['best_bid_price'] < 0, "price"] = data['best_ask_price']
    # Where best ask is not present, use best bid as a price
    data.loc[data['best_ask_price'] < 0, "price"] = data['best_bid_price']

    # Select coin
    data.query(f'instrument_id == {instr_id.value}', inplace=True)
    data.drop(columns='instrument_id', inplace=True)

    return data[['sample_idx', 'timestamp', 'instrument_type', 'strike', 'time_to_maturity', 'price']]

In [4]:
DATA_PATH = 'books_resampled_15min.hdf'
data = process_data(pd.read_hdf(DATA_PATH), InstrumentId.ETH)

In [5]:
data

Unnamed: 0,sample_idx,timestamp,instrument_type,strike,time_to_maturity,price
0,1,1701377983884,5,1850.0,0.002740,0.11250
1,1,1701377983884,6,1750.0,0.002740,0.00020
2,1,1701377983884,6,1800.0,0.002740,0.00010
3,1,1701377983884,5,1750.0,0.002740,0.16600
4,1,1701377983885,5,1800.0,0.002740,0.13950
...,...,...,...,...,...,...
13115794,12792,1714011211821,1,-1.0,0.922691,3430.50000
13115801,12792,1714011211876,6,2400.0,0.174746,0.02750
13115802,12792,1714011211882,6,2000.0,0.424061,0.03375
13115803,12792,1714011211883,6,1800.0,0.424061,0.02325


# Calibrate Heston model

In [6]:
@nb.njit()
def calibrate_heston_sample(
    heston_calc: HestonCalc,
    instr_type: np.ndarray,
    strike: np.ndarray,
    time_to_maturity: np.ndarray,
    price: np.ndarray,
    use_cached_params: bool
):
    calls_mask = (instr_type == InstrumentType.CALL_OPTION.value)
    puts_mask = (instr_type == InstrumentType.PUT_OPTION.value)
    spot_mask = (instr_type == InstrumentType.ASSET.value)
    futures_mask = (instr_type == InstrumentType.FUTURE.value)
    futures_sorted_idxs = np.argsort(time_to_maturity[futures_mask])

    spot_price = price[spot_mask].item()

    fwd_curve = forward_curve_from_forward_rates(
        Spot(spot_price),
        ForwardRates(price[futures_mask]),
        TimesToMaturity(price[futures_mask][futures_sorted_idxs])
    )

    buf_T = np.concatenate((time_to_maturity[calls_mask], time_to_maturity[puts_mask]))
    buf_K = np.concatenate((strike[calls_mask], strike[puts_mask]))
    buf_types = np.concatenate( (np.full(sum(calls_mask), True), np.full(sum(puts_mask), True)) )
    buf_pv = np.concatenate((price[calls_mask], price[puts_mask])) * spot_price
    sort_idxs = np.argsort(buf_T)

    volsurface_chain = VolSurfaceChainSpace(
        fwd_curve, 
        TimesToMaturity(buf_T[sort_idxs]),
        Strikes(buf_K[sort_idxs]),
        OptionTypes(buf_types[sort_idxs]),
        Premiums(buf_pv[sort_idxs])
    )

    flat_yield = FlatForwardYield(
        volsurface_chain.forward_curve().forward_yields(TimesToMaturity(np.unique(volsurface_chain.Ts))).data.mean())
    calibration_weights = CalibrationWeights(np.ones_like(volsurface_chain.Ks))

    if not use_cached_params:
        start_params = HestonParams(
            Variance(0.65),
            VarReversion(1.),
            AverageVar(1.3),
            VolOfVar(1.0),
            Correlation(-0.4),
            flat_yield
        )
        heston_calc.update_cached_params(start_params)

    heston_params, error = heston_calc.calibrate(volsurface_chain, flat_yield, calibration_weights)
    return heston_params, error, flat_yield

In [None]:
# def calibrate_heston(
#     sample_idx: np.ndarray,
#     instr_type: np.ndarray,
#     strike: np.ndarray,
#     time_to_maturity: np.ndarray,
#     price: np.ndarray
# ):
#     heston_calc = HestonCalc()
#     params_list = []
#     errors = []
#     flat_yields = []
#     exceptions = {}

#     unique_sample_idxs = np.unique(sample_idx)
#     for i in tqdm(range(len(unique_sample_idxs))):
#         curr_sample_idx = unique_sample_idxs[i]
#         sample_mask = (sample_idx == curr_sample_idx)

#         try:
#             heston_params, error, flat_yield = calibrate_heston_sample(
#                 heston_calc,
#                 instr_type[sample_mask],
#                 strike[sample_mask],
#                 time_to_maturity[sample_mask],
#                 price[sample_mask]
#             )
#             params_list.append(heston_params.array())
#             errors.append(error.v)
#             flat_yields.append(flat_yield.r)

#         except Exception:
#             params_list.append(np.full(5, np.nan))
#             errors.append(np.nan)
#             flat_yields.append(np.nan)
#             exceptions[curr_sample_idx] = traceback.format_exc()

#     return np.stack(params_list), np.array(errors), np.array(flat_yields), exceptions


# params_history, errors, flat_yields, exceptions = calibrate_heston(
#     data['sample_idx'].to_numpy(),
#     data['instrument_type'].to_numpy(),
#     data['strike'].to_numpy(),
#     data['time_to_maturity'].to_numpy(),
#     data['price'].to_numpy()
# )

In [9]:
heston_calc = HestonCalc()
params_list = []
errors = []
flat_yields = []
exceptions = {}

sample_idx = data['sample_idx'].to_numpy()
instr_type = data['instrument_type'].to_numpy()
strike = data['strike'].to_numpy()
time_to_maturity = data['time_to_maturity'].to_numpy()
price = data['price'].to_numpy()

unique_sample_idxs = np.unique(sample_idx)
for i in tqdm(range(len(unique_sample_idxs))):
    curr_sample_idx = unique_sample_idxs[i]
    sample_mask = (sample_idx == curr_sample_idx)

    try:
        heston_params, error, flat_yield = calibrate_heston_sample(
            heston_calc,
            instr_type[sample_mask],
            strike[sample_mask],
            time_to_maturity[sample_mask],
            price[sample_mask],
            use_cached_params = (i == 0)
        )
        params_list.append(heston_params.array())
        errors.append(error.v)
        flat_yields.append(flat_yield.r)

    except Exception:
        params_list.append(np.full(5, np.nan))
        errors.append(np.nan)
        flat_yields.append(np.nan)
        exceptions[curr_sample_idx] = traceback.format_exc()

100%|██████████| 12700/12700 [15:40<00:00, 13.51it/s] 


In [10]:
params_history = np.stack(params_list)

In [17]:
print('Number of samples:', len(errors))
print('% of samples with failed calibration:', (sum(np.isnan(errors)) / len(errors)) * 100)

Number of samples: 12700
% of samples with failed calibration: 90.46456692913387


In [28]:
with open('exceptions.txt', 'w') as f:
    for key, value in exceptions.items():
        print(f'Error at sample #{key}', file=f)
        print('---------------------------', file=f)
        print(value, file=f)

        # s = value.split('\n')[-2]
        # print(f'{key}: {s}', file=f)
        # if s != 'ValueError: No solution within implied vol interval':
        #     print(f'Error at sample #{key}', file=f)
        #     print('---------------------------', file=f)
        #     print(value, file=f)

## Pandas version

In [30]:
def calibrate_heston(data: pd.DataFrame):
    heston_calc = HestonCalc()
    params_list = []
    errors = []
    flat_yields = []

    for sample_idx in tqdm(data['sample_idx'].unique()):
        sample = data[data['sample_idx'] == sample_idx]

        calls = sample[sample['instrument_type'] == InstrumentType.CALL_OPTION.value]
        puts = sample[sample['instrument_type'] == InstrumentType.PUT_OPTION.value]
        spot = sample[sample['instrument_type'] == InstrumentType.ASSET.value]['price'].item()
        futures = sample[sample['instrument_type'] == InstrumentType.FUTURE.value] \
                .sort_values(by='time_to_maturity')

        calls.loc[:, 'price'] = calls['price'] * spot
        puts.loc[:, 'price'] = puts['price'] * spot

        try:
            fwd_curve = ForwardCurve.from_forward_rates(
                Spot(spot),
                ForwardRates(futures['price'].to_numpy()),
                TimesToMaturity(futures['time_to_maturity'].to_numpy())
            )

            buf_T = np.concatenate((calls['time_to_maturity'].to_numpy(), puts['time_to_maturity'].to_numpy()))
            buf_K = np.concatenate((calls['strike'].to_numpy(), puts['strike'].to_numpy()))
            buf_types = np.concatenate( (np.full(len(calls), True), np.full(len(puts), False)) )
            buf_pv = np.concatenate((calls['price'].to_numpy(), puts['price'].to_numpy()))
            sort_idxs = np.argsort(buf_T)

            volsurface_chain = VolSurfaceChainSpace(
                fwd_curve, 
                TimesToMaturity(buf_T[sort_idxs]),
                Strikes(buf_K[sort_idxs]),
                OptionTypes(buf_types[sort_idxs]),
                Premiums(buf_pv[sort_idxs])
            )

            flat_yield = FlatForwardYield(
                volsurface_chain.forward_curve().forward_yields(TimesToMaturity(np.unique(volsurface_chain.Ts))).data.mean())
            calibration_weights = CalibrationWeights(np.ones_like(volsurface_chain.Ks))

            start_params = HestonParams(
                Variance(0.65),
                VarReversion(1.),
                AverageVar(1.3),
                VolOfVar(1.0),
                Correlation(-0.4),
                flat_yield
            )
            heston_calc.update_cached_params(start_params)

            heston_params, error = heston_calc.calibrate(volsurface_chain, flat_yield, calibration_weights)
            return heston_params, error, flat_yield

        except Exception:
            params_list.append(np.full(5, np.nan))
            errors.append(np.nan)
            flat_yields.append(np.nan)

            print(f'ERROR AT SAMPLE {sample_idx}')
            traceback.print_exc()
            print('----------------------\n')

# Plot parameters dynamics

In [18]:
# np.save('params_history', params_history)
# np.save('errors', np.array(errors))
# np.save('flat_yields', np.array(flat_yields))

In [None]:
param_names = ['v₀ (Variance) ', 'κ (VarReversion)', 'θ (AverageVar) ', 'ε (VolOfVar)', 'ρ (Correlation)']
sample_ts = pd.to_datetime(data.groupby('sample_idx').agg({'timestamp': 'max'}).to_numpy().squeeze(), unit='ms')

fig = make_subplots(rows=7, cols=1)
for i in range(5):
    fig.add_trace(
        go.Scatter(x=sample_ts[:len(params_history)], y=params_history[:, i], name=param_names[i]),
        row=i+1, col=1
    )

fig.add_trace(
    go.Scatter(x=sample_ts[:len(params_history)], y=flat_yields, name='Flat yield'),
    row=6, col=1
)
fig.add_trace(
    go.Scatter(x=sample_ts[:len(params_history)], y=errors, name='Calibr. error'),
    row=7, col=1
)

subplot_titles = param_names + ['Flat yield', 'Calibr. error']
for i in range(7):
    if i == 0:
        fig['layout'][f'yaxis']['title'] = subplot_titles[i]
    else:
        fig['layout'][f'yaxis{i + 1}']['title'] = subplot_titles[i]

fig.update_layout(width=1500, height=1700)
fig.update_traces(connectgaps=True)
fig.show()