# Synthetic generation of Heston data

In this notebook we generate the labeled data for training the Heston model.

In [None]:
# Standard library imports
import os
from os.path import dirname as up
import numpy as np
import pandas as pd

import rpy2
import rpy2.robjects as robjects
from py_vollib.black_scholes.implied_volatility import implied_volatility
from py_lets_be_rational.exceptions import BelowIntrinsicException
import sklearn.utils
import numba
import logging

# Important directories
deep_cal_dir = up(up(os.getcwd()))
code_dir = up(os.getcwd())

r = robjects.r
r.source(code_dir +"/heston.R")
r_pricer = r('HestonCallClosedForm')

# Logging stuff
logger = logging.getLogger("heston")
logger.setLevel(logging.INFO)
fh = logging.FileHandler(code_dir + "/logs/heston_simulation.log")    
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
logger.addHandler(fh)

In [None]:
def heston_pricer(lambd, vbar, eta, rho, v0, r, tau, S0, K):
    """
    Computes European Call price under Heston dynamics with closedform solution.

    :param lambd: mean-reversion speed
    :param vbar: long-term average volatility
    :param eta: volatility of vol
    :param rho: correlation between stock and vol
    :param v0: intial volatility
    :param r: risk-free interest rate
    :param tau: time to maturity (year = 365 days)
    :param S0: initial share price
    :param K: strike price

    :return: Heston price, Black-Scholes implied volatility
    :rtype: float, float

    """
    
    try:
        price = r_pricer(lambd, vbar, eta, rho, v0, r, tau, S0, K)[0]

        try:

            iv = implied_volatility(price, S0, K, tau, r, 'c')

        except BelowIntrinsicException:

            logger.info('Below Intrinsic Exception with parameters:', lambd, vbar, 
                   eta, rho, v0, r, tau, S0, K)

            iv = None

    except rpy2.rinterface.RRuntimeError:

        logger.info('R Runtime Error with parameters:', lambd, vbar, eta, rho, 
              v0, r, tau, S0, K)

        price, iv = None, None

    return price, iv

### Preprocessing

In [None]:
# PARAMETERS
random_seed = 42
nb_samples = 10**6

# Heston parameter, bounds by Moodley (2005)
lambd_bounds = [0, 10]
vbar_bounds = [0, 1]
eta_bounds = [0, 5]
rho_bounds = [-1, 0]
v0_bounds = [0, 1]
S0 = 1
r = 0

# Import and preprocessing of import data
df = pd.read_csv(deep_cal_dir + '/data/raw_data/liquidity_bid_ask_spread/money_maturities.csv')
moneyness = df['moneyness'].values
maturity = df['time to maturity (years)'].values
strike = S0/moneyness

# Initialisation of df for labeled data
columns = ['lambda', 'vbar', 'eta', 'rho', 'v0', 'maturity', 'moneyness', 'iv']
df = pd.DataFrame(np.zeros((nb_samples,8)), columns=columns)

# Counter that tracks the amount of computed labeled pairs
count = 0

### Simulation

In [None]:
np.random.seed(random_seed)

logger.info("lambda, vbar, eta, rho, v0, maturity, moneyness, price, \
       intrinsic, price < intrinsic, abs(price) < 1E-5, OK?  \n" )

while count < nb_samples:

    # Sample uniformly from Heston parameter space.
    lambd = np.random.uniform(lambd_bounds[0], lambd_bounds[1])
    vbar = np.random.uniform(vbar_bounds[0], vbar_bounds[1])
    eta = np.random.uniform(eta_bounds[0], eta_bounds[1])
    rho = np.random.uniform(rho_bounds[0], rho_bounds[1])
    v0 = np.random.uniform(v0_bounds[0], v0_bounds[1])

    # Take respective (strike, maturity) pair from precomputed data.
    K = strike[count]
    T = maturity[count]
    M = moneyness[count]

    # Calculate Black-Scholes implied vol from Heston price.
    price, iv = heston_pricer(lambd, vbar, eta, rho, v0, r, T, S0, K)

    # Running through possible cases returned by heston_pricer.
    if iv is not None:

        df.loc[count, columns] = [lambd, vbar, eta, rho, v0, T, M, iv]

#       logger.info('Count {}/{}'.format(count + 1, nb_samples))

        # Increase running counter.
        count += 1

    elif price is not None:

        # Collect all necessary information to judge why it failed.
        error_data = (lambd, vbar, eta, rho, v0, T, M, 
                      price, np.max(S0 - K, 0), price < np.max(S0 - K), 
                      np.abs(price) < 1E-5, 
                      (price < np.max(S0 - K)) or (np.abs(price) < 1E-5))

        logger.info("%s \n" % repr(error_data))

    else:

        error_data = (lambd, vbar, eta, rho, v0, T, M)

        logger.info("%s \n" % repr(error_data))

Shuffle output data, split into training/validation/test sets and write to .csv files.

In [None]:
df = sklearn.utils.shuffle(df)

# Dissecting labeled pairs into training, validation and testing sets.
train_df, validation_df, test_df = np.split(df, [8*10**5, 9*10**5], axis=0)

print('Shapes: \n train {}, validation {}, test {}'.format(train_df.shape, validation_df.shape,
                                                  test_df.shape))

# Write labeled data to .csv file.
train_df.to_csv(deep_cal_dir +'/data/heston/training_data1.csv', index=False)
validation_df.to_csv(deep_cal_dir + '/data/heston/validation_data1.csv', index=False)
test_df.to_csv(deep_cal_dir + '/data/heston/testing_data1.csv', index=False)