In [15]:
!pip install lmfit




[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
sample_size = 1485
sample_freq = 5
save_folder = '27.05 Heston w/vol'
colab_path = "M:/Master/"
colab_path = ".."
file = colab_path + '/data/processed_data/2010-2023_NSS_filtered_vF.csv'

In [5]:
import numpy as np
import pandas as pd
from numba import jit
from lmfit import Parameters, minimize
from scipy.integrate import quad
import time
from multiprocessing import Pool

i = complex (0,1) # Define complex number i

@jit
def charHeston(u, S0, v0, r, T, sigma, kappa, theta, rho):
    '''Implementation of the characteristic function of the Heston model'''
    # Frequent expression
    rsiu = rho*sigma*i*u

    # Calculate d
    d1 = (rsiu - kappa)**2
    d2 = sigma**2*(i*u + u**2)
    d = np.sqrt(d1 + d2)

    # Calculate g
    g1 = kappa - rsiu - d
    g2 = kappa - rsiu + d
    g = g1/g2

    #Calculate first exp
    exp1 = np.exp(r*T)

    #Calculate the first power
    base1 = S0
    exponent1 = i*u
    power1 = np.power(base1, exponent1)

    # Calculate second power
    base2 = (1-g*np.exp(-d*T)) / (1-g)
    exponent2 = -2*theta*kappa/(sigma**2)
    power2 = np.power(base2, exponent2)

    # Calculate the second exp
    part1 = theta*kappa*T/(sigma**2) * g1
    part2 = v0/(sigma**2) * g2 * (1 - np.exp(d*T))/(1 - g*np.exp(d*T))
    exp2 = np.exp(part1 + part2)

    # Main calculation
    return exp1*power1*power2*exp2

@jit
def integrand(u, S0, v0, K, r, T, sigma, kappa, theta, rho):
    '''Calculate the integrand of the Heston model'''
    numerator = np.exp(r*T)*charHeston(u-i, S0, v0, r, T, sigma, kappa, theta, rho) - K * charHeston(u, S0, v0, r, T, sigma, kappa, theta, rho)
    denominator = i*u *np.power(K, i*u)
    return np.real(numerator/denominator)

@jit(forceobj = True)
def priceHestonIntegral(S0, v0, K, r, T, sigma, kappa, theta, rho, maxIntegral = 100):
    '''Calculate integral for the price of a European call option using the Heston model'''
    integral = np.array([quad(integrand, 0, maxIntegral, args=(S0, v0, K_i, r_i, T_i, sigma, kappa, theta, rho))[0] for K_i, r_i, T_i in zip(K, r, T) ])
    if np.isnan(integral).any():
      print('NaN value detected in integral. Returning 10**5')
      print(integral)
      return 10**5
    return 0.5 * (S0 - K * np.exp(-r * T)) + integral/np.pi

def iter_cb(params, iter, resid):
    '''Callback function to print the parameters at each iteration of the minimizer for debuging'''
    parameters = [params['sigma'].value, 
                  params['kappa'].value, 
                  params['theta'].value, 
                  params['rho'].value, 
                  np.sum(resid)/len(resid)]
    print(parameters) 

def calibrateHeston(optionPrices, S0, v0, strikes, rates, maturities, initial_parameters):
    '''Calibrate the Heston model parameters using the Levenberg Marquardt algorithm'''

    # Define the parameters to calibrate
    params = Parameters()
    params.add('sigma',value = initial_parameters[0], min = 1e-3, max = 1)
    params.add('kappa',value = initial_parameters[1], min = 1e-3, max = 1)
    params.add('theta',value = initial_parameters[2], min = 1e-4, max = 0.8)
    params.add('rho', value = initial_parameters[3], min = -0.8, max = 0.8)

    # Define the objective function to minimize as squared errors
    objectiveFunctionHeston = lambda paramVect: (optionPrices - priceHestonIntegral(S0,
                                                                        v0,
                                                                        strikes,  
                                                                        rates, 
                                                                        maturities, 
                                                                        paramVect['sigma'].value,                         
                                                                        paramVect['kappa'].value,
                                                                        paramVect['theta'].value,
                                                                        paramVect['rho'].value)) **2   
    # Run the Levenberg Marquardt algorithm
    result = minimize(objectiveFunctionHeston, 
                      params, 
                      method = 'leastsq',
                      iter_cb = iter_cb,
                      ftol = 1e-4) 
    return(result)


@jit(forceobj=True)
def create_data_np(df):
    '''Create numpy arrays with required data for calibration and testing'''
    optionPrices = df['Price'].values
    S0 = df['Underlying_last'].values[0]
    v0 = df['Volatility'].values[0]
    strikes = df['Strike'].values
    rates = df['R'].values
    maturities = df['TTM'].values

    data_np = np.empty((len(optionPrices), 6)) # Increase to 6 columns
    data_np[:, 0] = optionPrices
    data_np[:, 1] = S0
    data_np[:, 2] = v0
    data_np[:, 3] = strikes
    data_np[:, 4] = rates
    data_np[:, 5] = maturities

    return data_np

def HestonYearMonth(df, year, parameters):
    '''Calculate the Heston model parameters for each day of the year and calculate the option price for the next day'''
    # Extract last date of previous year and apply yearly filter
    calibration_date = df[df['Quote_date'] < f'{year}-01-01']['Quote_date'].max()
    df_year = df[(df['Quote_date'] >= f'{year}-01-01') & (df['Quote_date'] <= f'{year}-12-31')]

    # Convert 'Quote_date' to datetime
    df_year['Month'] = pd.to_datetime(df_year['Quote_date']).dt.to_period('M')

    print(df_year.head(3))

    optionPrices_list = np.array([])
    parameters_list = np.array([])

    months = np.sort(df_year['Month'].unique())
    
    for month in months:
        t = time.time()
        training_data = create_data_np(df[df['Quote_date'] == calibration_date])
        np.random.shuffle(training_data)
        training_data = training_data[:sample_size]

        calibrationResult = calibrateHeston(
            training_data[:, 0],
            training_data[0, 1],
            training_data[0, 2],
            training_data[:, 3],
            training_data[:, 4],
            training_data[:, 5],
            parameters
        )

        # Extract the parameters
        parameters = np.array([
            calibrationResult.params['sigma'].value,
            calibrationResult.params['kappa'].value,
            calibrationResult.params['theta'].value,
            calibrationResult.params['rho'].value
        ])
        parameters_list = np.append(parameters_list, parameters)

        optionPrices_month = np.array([])
        df_month = df_year[df_year['Month'] == month]
        data_nps = df_month.groupby('Quote_date').apply(create_data_np)
        # Price the options
        for i in range(len(data_nps)):
            optionPrice = priceHestonIntegral(
                data_nps[i][0, 1],
                data_nps[i][0, 2],
                data_nps[i][:, 3],
                data_nps[i][:, 4],
                data_nps[i][:, 5],
                *parameters
            )
            optionPrices_month = np.append(optionPrices_month, optionPrice)
        optionPrices_list = np.append(optionPrices_list, optionPrices_month)

        calibration_date = df_month['Quote_date'].max()
        
        print(f'{(np.sum((optionPrices_month - df_month["Price"]) ** 2) / len(optionPrices_month))**(0.5):.4f} RMSE for {month} with time {time.time() - t} with parameters {parameters}')

    print(f'{(np.sum((optionPrices_list - df_year["Price"]) ** 2) / len(optionPrices_list))**(0.5):.4f} RMSE for {year} with time {time.time() - t}')

    # Save option prices
    df_year['Heston_price'] = optionPrices_list
    print('=====================')
    print(f'Total RMSE {year}: {(np.sum((df_year["Heston_price"] - df_year["Price"]) ** 2) / len(df_year["Price"]))**(0.5)}')
    df_year.to_csv(f'{colab_path}Heston/{save_folder}/Results/{year} Heston vol results.csv', index=False)

    # Save parameters
    df_params = pd.DataFrame(months, columns = ['Quote_date'])
    parameters_list = np.reshape(parameters_list, (len(df_params), 4))
    df_params['sigma'] = parameters_list[:,0]
    df_params['kappa'] = parameters_list[:,1]
    df_params['theta'] = parameters_list[:,2]
    df_params['rho'] = parameters_list[:,3]
    df_params.to_csv(f'{colab_path}Heston/{save_folder}/Parameters/{year} Heston vol parameters.csv', index=False)

    del df_year
    return parameters

In [3]:
df = pd.read_csv(file)

# Add volatility column with 30 day rolling standard deviation of Underlying_last

# New dataframe without duplicate Quote_dates
df2 = df.drop_duplicates(subset=['Quote_date'])

# Calculate volatility
df2['Volatility'] = np.log(df2["Underlying_last"] / df2["Underlying_last"].shift()).rolling(30).std()*(252**0.5)

# Matching volatility in df2 to df
df['Volatility'] = df['Quote_date'].map(df2.set_index('Quote_date')['Volatility'])

df = df[(df["Quote_date"] >= "2014-12-01")]

print(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Volatility'] = np.log(df2["Underlying_last"] / df2["Underlying_last"].shift()).rolling(30).std()*(252**0.5)


          Unnamed: 0  Quote_date     Price  Underlying_last  Strike       TTM   
1456800      1742077  2014-12-01  1004.300          2053.83  1050.0  0.010959  \
1456801      1742078  2014-12-01   979.305          2053.83  1075.0  0.010959   
1456802      1742079  2014-12-01   954.300          2053.83  1100.0  0.010959   
1456803      1742080  2014-12-01   929.300          2053.83  1125.0  0.010959   
1456804      1742081  2014-12-01   904.305          2053.83  1150.0  0.010959   
...              ...         ...       ...              ...     ...       ...   
12057638    13739049  2023-03-31   217.750          4109.88  4700.0  1.726027   
12057639    13739050  2023-03-31   180.000          4109.88  4800.0  1.726027   
12057640    13739051  2023-03-31   146.550          4109.88  4900.0  1.726027   
12057641    13739052  2023-03-31   118.200          4109.88  5000.0  1.726027   
12057642    13739053  2023-03-31    94.400          4109.88  5100.0  1.726027   

                R  Volatili

In [6]:
df.info()
print(df.head(3))

parameters = [0.1541697429462341, 0.2508169330627191, 0.03304268331214607, 0.1106829336317251] #2015 start

for year in range(2015, 2024):
    parameters = HestonYearMonth(df, year, parameters)


<class 'pandas.core.frame.DataFrame'>
Index: 10600843 entries, 1456800 to 12057642
Data columns (total 8 columns):
 #   Column           Dtype  
---  ------           -----  
 0   Unnamed: 0       int64  
 1   Quote_date       object 
 2   Price            float64
 3   Underlying_last  float64
 4   Strike           float64
 5   TTM              float64
 6   R                float64
 7   Volatility       float64
dtypes: float64(6), int64(1), object(1)
memory usage: 727.9+ MB
         Unnamed: 0  Quote_date     Price  Underlying_last  Strike       TTM   
1456800     1742077  2014-12-01  1004.300          2053.83  1050.0  0.010959  \
1456801     1742078  2014-12-01   979.305          2053.83  1075.0  0.010959   
1456802     1742079  2014-12-01   954.300          2053.83  1100.0  0.010959   

              R  Volatility  
1456800  0.0001    0.091467  
1456801  0.0001    0.091467  
1456802  0.0001    0.091467  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_year['Month'] = pd.to_datetime(df_year['Quote_date']).dt.to_period('M')


         Unnamed: 0  Quote_date     Price  Underlying_last  Strike       TTM   
1518156     1809055  2015-01-02  1007.250          2058.69  1050.0  0.019178  \
1518157     1809056  2015-01-02   957.095          2058.69  1100.0  0.019178   
1518158     1809057  2015-01-02   907.140          2058.69  1150.0  0.019178   

              R  Volatility    Month  
1518156  0.0002    0.130943  2015-01  
1518157  0.0002    0.130943  2015-01  
1518158  0.0002    0.130943  2015-01  
[0.1541697429462341, 0.2508169330627191, 0.033042683312146055, 0.1106829336317251, 3123.821666035849]
[0.1541697429462341, 0.2508169330627191, 0.033042683312146055, 0.1106829336317251, 3123.821666035849]
[0.1541697429462341, 0.2508169330627191, 0.033042683312146055, 0.1106829336317251, 3123.821666035849]
[0.15416974705538766, 0.2508169330627191, 0.033042683312146055, 0.1106829336317251, 3123.821624858235]
[0.1541697429462341, 0.2508169364371093, 0.033042683312146055, 0.1106829336317251, 3123.8216910297933]
[0.15416974