In [15]:
!pip install lmfit




[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
sample_size = 1485
sample_freq = 5
save_folder = '11.05 Monthly'
colab_path = "M:/Master/"
colab_path = ".."
file = colab_path + '/data/processed_data/2010-2023_NSS_filtered_vF.csv'

In [28]:
import numpy as np
import pandas as pd
from numba import jit
from lmfit import Parameters, minimize
from scipy.integrate import quad
import time
from multiprocessing import Pool

i = complex (0,1) # Define complex number i

@jit
def charHeston(u, S0, r, T, sigma, kappa, theta, v0, rho):
    '''Implementation of the characteristic function of the Heston model'''
    # Frequent expression
    rsiu = rho*sigma*i*u

    # Calculate d
    d1 = (rsiu - kappa)**2
    d2 = sigma**2*(i*u + u**2)
    d = np.sqrt(d1 + d2)

    # Calculate g
    g1 = kappa - rsiu - d
    g2 = kappa - rsiu + d
    g = g1/g2

    #Calculate first exp
    exp1 = np.exp(r*T)

    #Calculate the first power
    base1 = S0
    exponent1 = i*u
    power1 = np.power(base1, exponent1)

    # Calculate second power
    base2 = (1-g*np.exp(-d*T)) / (1-g)
    exponent2 = -2*theta*kappa/(sigma**2)
    power2 = np.power(base2, exponent2)

    # Calculate the second exp
    part1 = theta*kappa*T/(sigma**2) * g1
    part2 = v0/(sigma**2) * g2 * (1 - np.exp(d*T))/(1 - g*np.exp(d*T))
    exp2 = np.exp(part1 + part2)

    # Main calculation
    return exp1*power1*power2*exp2

@jit
def integrand(u, S0, K, r, T, sigma, kappa, theta, v0, rho):
    '''Calculate the integrand of the Heston model'''
    numerator = np.exp(r*T)*charHeston(u-i, S0, r, T, sigma, kappa, theta, v0, rho) - K * charHeston(u, S0, r, T, sigma, kappa, theta, v0, rho)
    denominator = i*u *np.power(K, i*u)
    return np.real(numerator/denominator)

@jit(forceobj = True)
def priceHestonIntegral(S0, K, r, T, sigma, kappa, theta, v0, rho, maxIntegral = 100):
    '''Calculate integral for the price of a European call option using the Heston model'''
    integral = np.array([quad(integrand, 0, maxIntegral, args=(S0, K_i, r_i, T_i, sigma, kappa, theta, v0, rho))[0] for K_i, r_i, T_i in zip(K, r, T) ])
    if np.isnan(integral).any():
      print('NaN value detected in integral. Returning 10^10')
      print(integral)
      return 10**5
    return 0.5 * (S0 - K * np.exp(-r * T)) + integral/np.pi

def iter_cb(params, iter, resid):
    '''Callback function to print the parameters at each iteration of the minimizer for debuging'''
    parameters = [params['sigma'].value, 
                  params['kappa'].value, 
                  params['theta'].value, 
                  params['v0'].value, 
                  params['rho'].value, 
                  np.sum(resid)/len(resid)]
    print(parameters) 

def calibrateHeston(optionPrices, S0, strikes, rates, maturities, initial_parameters):
    '''Calibrate the Heston model parameters using the Levenberg Marquardt algorithm'''

    # Define the parameters to calibrate
    params = Parameters()
    params.add('sigma',value = initial_parameters[0], min = 1e-3, max = 1)
    params.add('kappa',value = initial_parameters[1], min = 1e-3, max = 1)
    params.add('theta',value = initial_parameters[2], min = 1e-4, max = 0.8)
    params.add('v0', value = initial_parameters[3], min = 1e-3, max = 0.4)
    params.add('rho', value = initial_parameters[4], min = -0.8, max = 0.8)

    # Define the objective function to minimize as squared errors
    objectiveFunctionHeston = lambda paramVect: (optionPrices - priceHestonIntegral(S0, strikes,  
                                                                        rates, 
                                                                        maturities, 
                                                                        paramVect['sigma'].value,                         
                                                                        paramVect['kappa'].value,
                                                                        paramVect['theta'].value,
                                                                        paramVect['v0'].value,
                                                                        paramVect['rho'].value)) **2   
    # Run the Levenberg Marquardt algorithm
    result = minimize(objectiveFunctionHeston, 
                      params, 
                      method = 'leastsq',
#                      iter_cb = iter_cb,
                      ftol = 1e-4) 
    return(result)

@jit(forceobj=True)
def create_data_np(df):
    '''Create numpy arrays with required data for calibration and testing'''
    optionPrices = df['Price'].values
    S0 = df['Underlying_last'].values[0]
    strikes = df['Strike'].values
    rates = df['R'].values
    maturities = df['TTM'].values

    data_np = np.empty((len(optionPrices), 5))
    data_np[:, 0] = optionPrices
    data_np[:, 1] = S0
    data_np[:, 2] = strikes
    data_np[:, 3] = rates
    data_np[:, 4] = maturities

    return data_np

def HestonYear(df, year, parameters, calibration_interval):
    '''Calculate the Heston model parameters for each day of the year and calculate the option price for the next day'''
    # Extract last date of previous year and apply yearly filter
    first_date = df[df['Quote_date'] < f'{year}-01-01']['Quote_date'].max()
    df_year = df[(df['Quote_date'] >= first_date) & (df['Quote_date'] <= f'{year}-12-31')]
    print(df_year.head(3))

    # Group the data by date extract numpy arrays with data for each day
    dates = np.sort(df_year['Quote_date'].unique())
    data_nps = df_year.groupby('Quote_date').apply(create_data_np)

    optionPrices_list = np.array([])
    parameters_list = np.array([])
    
    t = time.time()
    for i in range(len(data_nps)-1):
        t = time.time()
        if i % calibration_interval == 0:
            # Calibrate the Heston model

            training_data = data_nps[i]
            np.random.shuffle(training_data)
            training_data = training_data[:sample_size]

            calibrationResult = calibrateHeston(
                data_nps[i][:, 0],
                data_nps[i][0, 1],
                data_nps[i][:, 2],
                data_nps[i][:, 3],
                data_nps[i][:, 4],
                parameters
            )

            # Extract the parameters
            parameters = np.array([
                calibrationResult.params['sigma'].value,
                calibrationResult.params['kappa'].value,
                calibrationResult.params['theta'].value,
                calibrationResult.params['v0'].value,
                calibrationResult.params['rho'].value
            ])

        # Price the options
        optionPrice = priceHestonIntegral(
            data_nps[i+1][0, 1],
            data_nps[i+1][:, 2],
            data_nps[i+1][:, 3],
            data_nps[i+1][:, 4],
            *parameters
        )

        optionPrices_list = np.append(optionPrices_list, optionPrice)
        parameters_list = np.append(parameters_list, parameters)
        print(f'{(np.sum((optionPrice - data_nps[i+1][:, 0]) ** 2) / len(optionPrice))**(0.5):.4f} RMSE for {dates[i+1]} with time {time.time() - t} with parameters {parameters}')
    print(f'{year} finished with time: {time.time() - t}')

    # Save option prices
    df_year = df_year[df_year['Quote_date'] != dates[0]] # Remove the first date from datafram as it's only used to calibrate the first set of parameters
    df_year['Heston_price'] = optionPrices_list
    print('=====================')
    print(f'Total RMSE {year}: {(np.sum((df_year["Heston_price"] - df_year["Price"]) ** 2) / len(df_year["Price"]))**(0.5)}')
    df_year.to_csv(f'{colab_path}Heston/{save_folder}/Results/{dates[1]}_{dates[-1]} Heston results.csv', index=False)

    # Save parameters
    df_params = pd.DataFrame(dates[1:], columns = ['Quote_date'])
    parameters_list = np.reshape(parameters_list, (len(df_params), 5))
    df_params['sigma'] = parameters_list[:,0]
    df_params['kappa'] = parameters_list[:,1]
    df_params['theta'] = parameters_list[:,2]
    df_params['v0'] = parameters_list[:,3]
    df_params['rho'] = parameters_list[:,4]
    df_params.to_csv(f'{colab_path}Heston/{save_folder}/Parameters/{dates[1]}_{dates[-1]} Heston parameters.csv', index=False)
    return parameters

def HestonYearMonth(df, year, parameters):
    '''Calculate the Heston model parameters for each day of the year and calculate the option price for the next day'''
    # Extract last date of previous year and apply yearly filter
    calibration_date = df[df['Quote_date'] < f'{year}-01-01']['Quote_date'].max()
    df_year = df[(df['Quote_date'] >= f'{year}-01-01') & (df['Quote_date'] <= f'{year}-12-31')]

    # Convert 'Quote_date' to datetime
    df_year['Month'] = pd.to_datetime(df_year['Quote_date']).dt.to_period('M')

    print(df_year.head(3))

    optionPrices_list = np.array([])
    parameters_list = np.array([])

    months = np.sort(df_year['Month'].unique())
    
    for month in months:
        t = time.time()
        training_data = create_data_np(df[df['Quote_date'] == calibration_date])
        np.random.shuffle(training_data)
        training_data = training_data[:sample_size]

        calibrationResult = calibrateHeston(
            training_data[:, 0],
            training_data[0, 1],
            training_data[:, 2],
            training_data[:, 3],
            training_data[:, 4],
            parameters
        )

        # Extract the parameters
        parameters = np.array([
            calibrationResult.params['sigma'].value,
            calibrationResult.params['kappa'].value,
            calibrationResult.params['theta'].value,
            calibrationResult.params['v0'].value,
            calibrationResult.params['rho'].value
        ])
        parameters_list = np.append(parameters_list, parameters)

        optionPrices_month = np.array([])
        df_month = df_year[df_year['Month'] == month]
        data_nps = df_month.groupby('Quote_date').apply(create_data_np)
        # Price the options
        for i in range(len(data_nps)):
            optionPrice = priceHestonIntegral(
                data_nps[i][0, 1],
                data_nps[i][:, 2],
                data_nps[i][:, 3],
                data_nps[i][:, 4],
                *parameters
            )
            optionPrices_month = np.append(optionPrices_month, optionPrice)
        optionPrices_list = np.append(optionPrices_list, optionPrices_month)

        calibration_date = df_month['Quote_date'].max()
        
        print(f'{(np.sum((optionPrices_month - df_month["Price"]) ** 2) / len(optionPrices_month))**(0.5):.4f} RMSE for {month} with time {time.time() - t} with parameters {parameters}')

    print(f'{(np.sum((optionPrices_list - df_year["Price"]) ** 2) / len(optionPrices_month))**(0.5):.4f} RMSE for {year} with time {time.time() - t}')

    # Save option prices
    df_year['Heston_price'] = optionPrices_list
    print('=====================')
    print(f'Total RMSE {year}: {(np.sum((df_year["Heston_price"] - df_year["Price"]) ** 2) / len(df_year["Price"]))**(0.5)}')
    df_year.to_csv(f'{colab_path}Heston/{save_folder}/Results/{year} Heston results.csv', index=False)

    # Save parameters
    df_params = pd.DataFrame(df_year['Quote_date'].unique(), columns = ['Quote_date'])
    parameters_list = np.reshape(parameters_list, (len(df_params), 5))
    df_params['sigma'] = parameters_list[:,0]
    df_params['kappa'] = parameters_list[:,1]
    df_params['theta'] = parameters_list[:,2]
    df_params['v0'] = parameters_list[:,3]
    df_params['rho'] = parameters_list[:,4]
    df_params.to_csv(f'{colab_path}Heston/{save_folder}/Parameters/{year} Heston parameters.csv', index=False)

    del df_year
    return parameters

In [29]:
df = pd.read_csv(file)
df.info()
print(df.head(3))

parameters = [0.1541697429462341, 0.2508169330627191, 0.03304268331214607, 0.015914849408431134, 0.1106829336317251] #2015 start

for year in range(2015, 2024):
    parameters = HestonYearMonth(df, year, parameters)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12057643 entries, 0 to 12057642
Data columns (total 7 columns):
 #   Column           Dtype  
---  ------           -----  
 0   Unnamed: 0       int64  
 1   Quote_date       object 
 2   Price            float64
 3   Underlying_last  float64
 4   Strike           float64
 5   TTM              float64
 6   R                float64
dtypes: float64(5), int64(1), object(1)
memory usage: 643.9+ MB
   Unnamed: 0  Quote_date   Price  Underlying_last  Strike       TTM       R
0           0  2010-01-04  207.49          1132.99   925.0  0.008219  0.0005
1           1  2010-01-04  182.50          1132.99   950.0  0.008219  0.0005
2           2  2010-01-04  157.50          1132.99   975.0  0.008219  0.0005


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_year['Month'] = pd.to_datetime(df_year['Quote_date']).dt.to_period('M')


         Unnamed: 0  Quote_date     Price  Underlying_last  Strike       TTM  \
1518156     1809055  2015-01-02  1007.250          2058.69  1050.0  0.019178   
1518157     1809056  2015-01-02   957.095          2058.69  1100.0  0.019178   
1518158     1809057  2015-01-02   907.140          2058.69  1150.0  0.019178   

              R    Month  
1518156  0.0002  2015-01  
1518157  0.0002  2015-01  
1518158  0.0002  2015-01  
Months: [Period('2015-01', 'M') Period('2015-02', 'M') Period('2015-03', 'M')
 Period('2015-04', 'M') Period('2015-05', 'M') Period('2015-06', 'M')
 Period('2015-07', 'M') Period('2015-08', 'M') Period('2015-09', 'M')
 Period('2015-10', 'M') Period('2015-11', 'M') Period('2015-12', 'M')]
Calibration date: 2014-12-31
3.8592 RMSE for 2015-01 with time 55.43536591529846 with parameters [0.12005797 0.20011216 0.04192356 0.01586924 0.11769891]
Calibration date: 2015-01-30
6.6384 RMSE for 2015-02 with time 83.7592842578888 with parameters [0.00149046 0.00234491 0.7992463

KeyboardInterrupt: 