# Using ML for paramter estimation

In [20]:
import matplotlib.pyplot as plt
import csv
import pandas as pd
import numpy as np
import scipy as sp
import sklearn as sl
from scipy import stats
from sklearn import datasets
from sklearn import linear_model
from scipy.optimize import minimize
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV


"""Loading the Data"""

dir       = 'data/'
file_name = 'grid1_zheb51fo.xlsx'
UX1       = pd.read_excel(dir+file_name, sheet_name='UX1_Index')
UX2       = pd.read_excel(dir+file_name, sheet_name='UX2_Index')
UX1       = UX1.set_index('Date')
UX2       = UX2.set_index('Date')
UX1.dropna(subset = ["PX_LAST"], inplace=True)   #Getting rid of NaN values
UX2.dropna(subset = ["PX_LAST"], inplace=True)
UX1.sort_index(inplace=True)
UX2.sort_index(inplace=True)
dataset = np.array(UX1.PX_LAST)

In [43]:
"""Specifying the Input & Output (Labels)"""

n=2     #Looking at n previous days to estimate paramteres + today's volatility
X= [[dataset[j] for j in range(i, i+n+1)] for i in range(len(dataset) - n - 1)]
Y= [dataset[i+n+1] for i in range(len(dataset) - n -1)]

# Putting more emphasis on today's data:

emphasis = 0    #repearing today's value emphasis times
for i in range(len(X)):
    for j in range(emphasis-1):
        X[i].append(X[i][-1])        
        
print(X[:10])
print("\n")
print(Y[:10])

[[24.85, 24.45, 23.55], [24.45, 23.55, 23.15], [23.55, 23.15, 22.55], [23.15, 22.55, 22.25], [22.55, 22.25, 22.8], [22.25, 22.8, 22.5], [22.8, 22.5, 22.0], [22.5, 22.0, 22.55], [22.0, 22.55, 21.55], [22.55, 21.55, 21.4]]


[23.15, 22.55, 22.25, 22.8, 22.5, 22.0, 22.55, 21.55, 21.4, 22.65]


In [3]:
"""Splitting Data into Train and Test set"""

m_training= 2000
m_test= 1000

X_training=X[ :m_training  ]
Y_training=Y[1:m_training+1]   #The labels are shifted 1 to the right since they are tomorrow's value

X_test=X[m_training:m_test]
Y_test=Y[m_training+1:m_test+1]

#for the case of n=0
X_training= np.ravel(X_training)  
X_test    = np.ravel(X_test)

In [4]:
"""Building the Hypothesis"""

def heston_pde_milstein(V0, k, theta, rho, sigma):
    WT  = np.sqrt( 1 ) * np.random.multivariate_normal(np.array([0, 0]), np.array([[1, rho], [rho, 1]]))[1]
    V1 = np.abs(V0+ k * (theta - V0) * 1 + sigma * np.sqrt(V0) * WT + .25 * sigma**2 * (WT**2 - 1))
    return V1

In [5]:
"""Building the Loss Function"""

#The difference between real label and the predicted one to the power of 2
#l = (heston_pde_milstein(X_training[i], r, k, theta, rho, sigma) - Y_training[i])**2

m=len(X_training)  #Training set size
# k: x[0], theta:x[1], rho:x[2], sigma:x[3]

def Ls(X):
    def heston_inner_func(i):   #calculates the predicted lable for each training sample
        WT  = np.random.multivariate_normal(np.array([0, 0]), np.array([[1, X[2]], [X[2], 1]]))[1]
        V1 =  np.abs(X_training[i] + X[0] * (X[0] - X_training[i]) * 1 + 
                     X[3] * np.sqrt(X_training[i]) * WT + .25 * X[3]**2 * (WT**2 - 1))
        return V1
    Ls = (1/m) * np.sum(np.array([(heston_inner_func(i) - Y_training[i])**2 for i in range(m)]))
    return Ls

In [6]:
"""ERM: Empirical Risk Minimization"""

result = minimize(Ls, (1,1,1,1))  #initial values should be given

In [7]:
best_params = result.x
print("Best Paramteres:", best_params)
print("Minimum Ls:", result.fun)

Best Paramteres: [0.99999987 1.00000008 0.99999968 1.        ]
Minimum Ls: 257.73406718244416


In [8]:
#Are we trapped inside a loval minima?!!

In [10]:
"""Evaluating the model on test Set"""

k, theta, rho, sigma = best_params[0], best_params[1], best_params[2], best_params[3]
Y_pred = np.array([heston_pde_milstein(X_test[i], k, theta, rho, sigma) for i in range(len(X_test))])
Y_pred = np.ravel(Y_pred)

#True Error
Ld =  (1/len(Y_test)) * np.sum((Y_pred - Y_test)**2)
print(Ld)

ZeroDivisionError: division by zero

In [None]:
Y_pred_train = np.array([heston_pde_milstein(X_training[i], k, theta, rho, sigma) for i in range(len(X_training))])
L =  (1/m) * np.sum((Y_pred_train - Y_training)**2)
print(L)

print(Y_pred_train[:50])
print(Y_training[:50])

In [None]:
"""Using Neural Network for the Whole task: A Non-Physical Experiment"""

#Let's, just for a momemnt, Ignore any pre_knowledge about the subject and see what happens

m_training= 2500
m_test= 500

X_training=X[ :m_training  ]
Y_training=Y[1:m_training+1]   #The labels are shifted 1 to the right since they should be tomorrow's volatility

X_test=X[m_training: m_training + m_test]
Y_test=Y[m_training+1: m_training+m_test+1]


NN_R = MLPRegressor(hidden_layer_sizes=(100,100,100), max_iter=1000, 
                    alpha=1e-4, solver='adam', momentum=0.9,
                    activation='relu', tol=1e-4, learning_rate_init=0.0001)

NN_R.fit(X_training, Y_training)
Y_pred = NN_R.predict(X_test)

# print("Predicted Volatilty:\n", Y_pred[100:120])
# print("\nTrue Volatility:\n",     Y_test[100:120])
# print("\nThe score:", NN_R.score(X_test, Y_test))

NN_R = MLPRegressor(max_iter=10000, alpha=1e-4, solver='adam', momentum=0.9,
                    activation='relu', tol=1e-4, learning_rate_init=0.0001)
parameters = {'hidden_layer_sizes': [(10,), (20,), (40,), (20,20,),(20,20,20), (40,20,10) ]}

gsc = GridSearchCV(NN_R, parameters, cv=6)

gsc.fit(X_training, Y_training)


print("Best parameters set found:")
print(gsc.best_params_)
print("\n")

print("Score with best parameters:")
print(gsc.best_score_)
print("\n")

print("All scores on the grid:")
print(gsc.cv_results_.get('split0_test_score'))
print(gsc.cv_results_.get('split1_test_score'))
print(gsc.cv_results_.get('split2_test_score'))
print(gsc.cv_results_.get('split3_test_score'))
print(gsc.cv_results_.get('split4_test_score'))
print(gsc.cv_results_.get('split5_test_score'))