# Using ML for paramter estimation

In [1]:
import matplotlib.pyplot as plt
import csv
import pandas as pd
import numpy as np
import scipy as sp
import sklearn as sl
from scipy import stats
from sklearn import datasets
from sklearn import linear_model
from scipy.optimize import minimize

"""Loading the Data"""

dir       = 'data/'
file_name = 'grid1_zheb51fo.xlsx'
UX1       = pd.read_excel(dir+file_name, sheet_name='UX1_Index')
UX2       = pd.read_excel(dir+file_name, sheet_name='UX2_Index')
UX1       = UX1.set_index('Date')
UX2       = UX2.set_index('Date')
UX1.sort_index(inplace=True)
UX2.sort_index(inplace=True)
dataset = np.array(UX1.PX_LAST)
dataset = dataset[4:]              #we drop 4 data for simpler illustratoins

In [2]:
"""Specifying the Input & Output (Labels)"""

n=0     #Looking at n previous days to estimate paramteres
X=np.array([[dataset[j] for j in range(i, i+n+1)] for i in range(len(dataset) - n)])
Y=np.array([dataset[i+n] for i in range(len(dataset) - n)])

In [3]:
"""Splitting Data into Train and Test set"""

m_training= 2000
m_test= 1000

X_training=X[ :m_training  ]
Y_training=Y[1:m_training+1]   #The labels are shifted 1 to the right since they are tomorrow's value

X_test=X[m_training:-1]
Y_test=Y[m_training+1:]

#for the case of n=0
X_training= np.ravel(X_training)  
X_test    = np.ravel(X_test)

In [12]:
#Removing NaN values:
nan_array = np.isnan(X_test)
not_nan_array = ~ nan_array
X_test = X_test[not_nan_array]

nan_array = np.isnan(Y_test)
not_nan_array = ~ nan_array
Y_test = Y_test[not_nan_array]

In [13]:
"""Building the Hypothesis"""

def heston_pde_milstein(V0, k, theta, rho, sigma):
    WT  = np.sqrt( 1 ) * np.random.multivariate_normal(np.array([0, 0]), np.array([[1, rho], [rho, 1]]))[1]
    V1 = np.abs(V0+ k * (theta - V0) * 1 + sigma * np.sqrt(V0) * WT + .25 * sigma**2 * (WT**2 - 1))
    return V1

In [14]:
"""Building the Loss Function"""

#The difference between real label and the predicted one to the power of 2
#l = (heston_pde_milstein(X_training[i], r, k, theta, rho, sigma) - Y_training[i])**2

m=len(X_training)  #Training set size
# k: x[0], theta:x[1], rho:x[2], sigma:x[3]

def Ls(X):
    def heston_inner_func(i):   #calculates the predicted lable for each training sample
        WT  = np.random.multivariate_normal(np.array([0, 0]), np.array([[1, X[2]], [X[2], 1]]))[1]
        V1 =  np.abs(X_training[i] + X[0] * (X[0] - X_training[i]) * 1 + 
                     X[3] * np.sqrt(X_training[i]) * WT + .25 * X[3]**2 * (WT**2 - 1))
        return V1
    Ls = (1/m) * np.sum(np.array([(heston_inner_func(i) - Y_training[i])**2 for i in range(m)]))
    return Ls

In [20]:
"""ERM: Empirical Risk Minimization"""

result = minimize(Ls, (0,0,0,0))  #initial values should be given

In [21]:
best_params = result.x
print("Best Paramteres:", best_params)
print("Minimum Ls:", result.fun)

Best Paramteres: [ 4.48473614e-07 -2.40657956e-08  1.10410470e-07 -5.16934758e-08]
Minimum Ls: 0.8302061008517941


In [22]:
"""Evaluating the model on test Set"""

k, theta, rho, sigma = best_params[0], best_params[1], best_params[2], best_params[3]
Y_pred = np.array([heston_pde_milstein(X_test[i], k, theta, rho, sigma) for i in range(len(X_test))])
Y_pred = np.ravel(Y_pred)

#True Error
Ld =  (1/len(Y_test)) * np.sum((Y_pred - Y_test)**2)
print(Ld)

2.1170046816827566


In [23]:
Y_pred_train = np.array([heston_pde_milstein(X_training[i], k, theta, rho, sigma) for i in range(len(X_training))])
L =  (1/m) * np.sum((Y_pred_train - Y_training)**2)
print(L)

print(Y_pred_train[:50])
print(Y_training[:50])

0.8302060864968489
[22.54999034 22.24999017 22.79998963 22.49999012 21.99999013 22.54998993
 21.54999078 21.39999014 22.64998955 24.74998872 24.34998832 24.54998878
 23.74998943 23.99998896 24.64998923 24.24998922 23.44998958 23.54998998
 25.64998851 25.84998797 26.19998881 25.64998865 25.54998844 24.94998887
 24.89998885 23.5999894  22.74998992 21.79998991 21.34999013 20.94999025
 21.64999015 21.09999049 20.94999024 20.39999101 23.04998982 22.79998947
 22.59998967 22.49998972 21.59999083 21.2499907  21.24999033 21.3999904
 21.59999065 21.44999058 21.49999051 20.89999049 20.09999126 19.7999913
 20.19999111 19.89999109]
[22.25 22.8  22.5  22.   22.55 21.55 21.4  22.65 24.75 24.35 24.55 23.75
 24.   24.65 24.25 23.45 23.55 25.65 25.85 26.2  25.65 25.55 24.95 24.9
 23.6  22.75 21.8  21.35 20.95 21.65 21.1  20.95 20.4  23.05 22.8  22.6
 22.5  21.6  21.25 21.25 21.4  21.6  21.45 21.5  20.9  20.1  19.8  20.2
 19.9  19.45]


In [None]:
"""Using Neural Network for Whole task: A Non-Physical Experiment"""

#Let's, just for a momemnt, Ignore any pre_knowledge about the subject and see what happens!