# Using ML for paramter estimation

In [1]:
import matplotlib.pyplot as plt
import csv
import pandas as pd
import numpy as np
import scipy as sp
import sklearn as sl
from scipy import stats
from sklearn import datasets
from sklearn import linear_model
from scipy.optimize import minimize

np.random.seed(1333)

"""Loading the Data"""

dir       = 'data/'
file_name = 'grid1_zheb51fo.xlsx'
UX1       = pd.read_excel(dir+file_name, sheet_name='UX1_Index')
UX2       = pd.read_excel(dir+file_name, sheet_name='UX2_Index')
UX1       = UX1.set_index('Date')
UX2       = UX2.set_index('Date')
UX1.sort_index(inplace=True)
UX2.sort_index(inplace=True)
dataset = np.array(UX1.PX_LAST)
dataset = dataset[4:]              #we drop 4 data for simpler illustratoins

In [2]:
"""Specifying the Input & Output (Labels)"""

n=0     #Looking at n previous days to estimate paramteres
X=np.array([[dataset[j] for j in range(i, i+n+1)] for i in range(len(dataset) - n)])
Y=np.array([dataset[i+n] for i in range(len(dataset) - n)])

In [3]:
"""Splitting Data into Train and Test set"""

m_training= 2000
m_test= 1000

X_training=X[:m_training]
Y_training=Y[:m_training]

X_test=X[m_training:]
Y_test=Y[m_training:]

#for the case of n=0
X_training= np.ravel(X_training)  
X_test    = np.ravel(X_test)

In [4]:
#Removing NaN values:
nan_array = np.isnan(X_test)
not_nan_array = ~ nan_array
X_test = X_test[not_nan_array]

nan_array = np.isnan(Y_test)
not_nan_array = ~ nan_array
Y_test = Y_test[not_nan_array]

In [5]:
"""Building the Hypothesis"""

def heston_pde_milstein(V0, k, theta, rho, sigma):
    WT  = np.sqrt( 1 ) * np.random.multivariate_normal( np.array( [0,  0] ), np.array( [[1, rho], [rho, 1]] ), size=1)
    V1 = np.abs(V0+ k * (theta - V0) * 1 + sigma * np.sqrt(V0) * WT[:, 1] + .25 * sigma**2 * (WT[:,1]**2 - 1))
    return V1

In [6]:
"""Building the Loss Function"""

#The difference between real label and the predicted one to the power of 2
#l = (heston_pde_milstein(X_training[i], r, k, theta, rho, sigma) - Y_training[i])**2

m=len(X_training)  #Training set size
# k: x[0], theta:x[1], rho:x[2], sigma:x[3]

def Ls(X):
    def heston_inner_func(i):   #calculates the predicted lable for each training sample
        WT  = np.random.multivariate_normal(np.array([0, 0]), np.array([[1, X[2]], [X[2], 1]]), size=1)
        V1 =  np.abs(X_training[i] + X[0] * (X[0] - X_training[i]) * 1 + 
                     X[3] * np.sqrt(X_training[i]) * WT[:, 1] + .25 * X[3]**2 * (WT[:,1]**2 - 1))
        return V1
    Ls = (1/m) * np.sum(np.array([(heston_inner_func(i) - Y_training[i])**2 for i in range(m)]))
    return Ls

In [7]:
"""ERM: Empirical Risk Minimization"""

result = minimize(Ls, (2,2,2,2))

  WT  = np.random.multivariate_normal(np.array([0, 0]), np.array([[1, X[2]], [X[2], 1]]), size=1)
  WT  = np.random.multivariate_normal(np.array([0, 0]), np.array([[1, X[2]], [X[2], 1]]), size=1)


In [8]:
best_params = result.x
print("Best Paramteres:", best_params)
print("Minimum Ls:", result.fun)

Best Paramteres: [2.00000025 1.99999996 2.00000009 2.00000002]
Minimum Ls: 84.16463517753044


In [11]:
"""Evaluating the model on test Set"""

k, theta, rho, sigma = best_params[0], best_params[1], best_params[2], best_params[3]
Y_pred = np.array([heston_pde_milstein(X_test[i], k, theta, rho, sigma) for i in range(len(X_test))])
Y_pred = np.ravel(Y_pred)

#True Error
Ld =  (1/len(Y_test)) * np.sum((Y_pred - Y_test)**2)
print(Ld)

112.51648225044893


  WT  = np.sqrt( 1 ) * np.random.multivariate_normal( np.array( [0,  0] ), np.array( [[1, rho], [rho, 1]] ), size=1)


In [10]:
# Y_pred_train = np.array([heston_pde_milstein(X_training[i], k, theta, rho, sigma) for i in range(len(X_training))])
# L =  (1/m) * np.sum((Y_pred_train - Y_training)**2)
# print(L)