In [None]:
import sys  
sys.path.insert(0, '../')

In [None]:
import os
import numpy as np
import GPyOpt
from feature_extraction.loan_struct_reader import LoanStructReader
from feature_extraction.loan_options_maker import LoanOptionsMaker
from feature_extraction.loan_concatenator import LoanConcatenator
from feature_extraction.loan_markuper import LoanMarkuper
from feature_extraction.hist_market_worker import HistMarketDataWorker

from scipy.stats.contingency import margins
import matplotlib.pyplot as plt
import scipy.stats as ss
import math

In [None]:
plt.rcParams["axes.labelsize"] = 16. 
plt.rcParams["xtick.labelsize"] = 14. 
plt.rcParams["ytick.labelsize"] = 14. 
plt.rcParams["legend.fontsize"] = 12. 
plt.rcParams["figure.figsize"] = [15., 6.]

In [None]:
PROJECT_PATH = os.path.dirname(os.getcwd())
DOMAIN = [{'name': 'var_1', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_2', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_3', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_4', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_5', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_6', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_7', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_8', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_9', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_10', 'type': 'continuous', 'domain': (0, 2)}]

DOMAIN_1 = {'name': 'var_1', 'type': 'continuous', 'domain':(0, 1), 'dimensionality':10}

In [None]:
def f(X, json_example_path=os.path.join(PROJECT_PATH, r"json_real", r"sample_1.json")):
    "It should take 2-dimensional numpy arrays as input and return 2-dimensional outputs (one evaluation per row)"
    Y = []
    for x in X:
        struct, original_options = LoanStructReader().Read(json_example_path, returnOptions=True)
        options = LoanOptionsMaker().Make(original_options, ir_curve=x)

        loan = LoanConcatenator(struct, options, {}).Parse()
        Y.append([LoanMarkuper().MarkupLoan(loan)])
    return np.array(Y)

In [None]:
f([[10,1,1.9,-1,-1,1,1,1,14,1]])

-----------
### Test on empirical observations

In [None]:
mu = np.array([0.999791,  0.99853703,  0.99374632,  0.98132504,  0.96271221,
        0.92621573,  0.85829915,  0.67985138,  0.4574298,  0.45154276])
cov = np.cov(np.random.rand(10,10))

In [None]:
hmdw = HistMarketDataWorker().load()

In [None]:
normal_dist_1 = ss.multivariate_normal(mean=hmdw.get_func().mean, cov=hmdw.get_func().cov, allow_singular=True)
normal_dist_2 = ss.multivariate_normal(mean=hmdw.get_func().mean, cov=cov, allow_singular=True)
rho_foo_1 = lambda X: np.array([[normal_dist_1.pdf(x)] for x in X])

In [None]:
hmdw.get_func().cov

---------

In [None]:
X_init = np.array(np.ones(HistMarketDataWorker().load().get_sample().shape).reshape(1, -1))
print(X_init.shape)
Y_init = f(X_init)
print(Y_init)

bo = GPyOpt.methods.BayesianOptimization(f=f, domain=DOMAIN,
                                        initial_design_numdata = 5,
                                        X=X_init, Y=Y_init,
                                        acquisition_type='LCB',
                                        exact_feval = True,
                                        normalize_Y = False,
                                        optimize_restarts = 10,
                                        acquisition_weight = 2,
                                        de_duplication = True)
                                        #rho_func = rho_foo_1, #hmdw.rho_normal,
                                        #exploration_weight=1000,
                                        #with_noise=False)

X_test = []
for i in range(20):
    X_test.append(hmdw.get_sample())
X_test = np.array(X_test)

bo.run_optimization(30)#, test_X = X_test)
print('evaluations_list: ', bo.get_evaluations())

print('test_sample_values:')
predict = bo.model.predict(X_test)
print('\mu_values: ', predict[0])
print('\sigma_values: ', predict[1])
print('MAE: ', np.mean(predict[1] / predict[0]))
#print('MAE_list: ', bo.error_list)

In [None]:
bo.model.predict(np.array([[1, 1, 0.5, 0.5, 1, 1, 1, 1, 1, 0.33]]))

In [None]:
X_test[0]

In [None]:
bo.model.predict(X_test[0])

-----------
### Check statistics from X_test

In [None]:
X_test = []
for i in range(20000):
    X_test.append(hmdw.get_sample())
X_test = np.array(X_test)

In [None]:
X_test

In [None]:
mean_values = np.mean(np.transpose(X_test), 1)
mean_values

In [None]:
np.std(np.transpose(X_test), 1)

------------

In [None]:
a1 = bo.error_list[1:]
b1 = bo.test_error_list[1:]

In [None]:
plt.plot(a1, label = 'train_MAE')
plt.plot(b1, label = 'model_MAE')
#plt.plot([np.mean(predict[1] / predict[0]) for _ in range(len(bo.error_list)-1)], color = 'red', label = 'test_MAE')
plt.legend()
plt.show()

In [None]:
plt.plot(np.diff(bo.test_error_list[1:])[20:])
print('maximum_diff: ', max(np.diff(bo.test_error_list[1:])[20:]))
plt.show()

In [None]:
np.std(np.transpose(X_test)[0])

In [None]:
rho = lambda x: ss.norm.pdf(x, loc = np.mean(np.transpose(X_test)[0]), scale = np.sqrt(hmdw.get_func().cov[0][0]))

x = np.linspace(0.9997, 0.9999, 10000)
plt.plot(x, rho(x), 'k-', lw=2, label='frozen pdf')
plt.hist(np.transpose(X_test)[0], bins = 30, normed = True)
plt.grid()
plt.title('first argument distribution')
plt.show()

In [None]:
rho = lambda x: ss.norm.pdf(x, loc = np.mean(np.transpose(X_test)[1]), scale = np.sqrt(hmdw.get_func().cov[1][1]))

x = np.linspace(0.9984, 0.9988, 10000)
plt.plot(x, rho(x), 'k-', lw=2, label='frozen pdf')
plt.hist(np.transpose(X_test)[1], bins = 30, normed = True)
plt.grid()
plt.title('first argument distribution')
plt.show()

In [None]:
rho = lambda x: ss.norm.pdf(x, loc = np.mean(np.transpose(X_test)[-1]), scale = np.sqrt(hmdw.get_func().cov[-1][-1]))

x = np.linspace(0.4, 0.5, 10000)
plt.plot(x, rho(x), 'k-', lw=2, label='frozen pdf')
plt.hist(np.transpose(X_test)[-1], bins = 30, normed = True)
plt.grid()
plt.title('last argument distribution')
plt.show()

In [None]:
hmdw.get_func().cov[0][0]*1e7

### Check data on test points
---------------

In [None]:
vals = bo.get_evaluations()
X = vals[0]
y = [k[0] for k in vals[1]]

In [None]:
plt.hist(X.T[0], bins = 40, label = "First_argument")
plt.axvline(mean_values[0], color = 'red', label = 'mean_test_value')
plt.legend()
plt.show()

In [None]:
plt.hist(X.T[1], bins = 40, label = "Second_argument")
plt.axvline(mean_values[1], color = 'red', label = 'mean_test_value')
plt.legend()
plt.show()

In [None]:
plt.hist(X.T[2], bins = 40, label = "Third_argument")
plt.axvline(mean_values[2], color = 'red', label = 'mean_test_value')
plt.legend()
plt.show()

In [None]:
plt.hist(X.T[3], bins = 40, label = "Fourth_argument")
plt.axvline(mean_values[3], color = 'red', label = 'mean_test_value')
plt.legend()
plt.show()

In [None]:
plt.hist(X.T[-1], bins = 40, label = "Last_argument")
plt.axvline(mean_values[-1], color = 'red', label = 'mean_test_value')
plt.legend()
plt.show()

----------
### Lets look at the histogram of the avaliable values relative to the theoretical distribution for the model covariance matrix

In [None]:
plt.plot(np.arange(0,1, 0.0001), np.array(bo.sigma_arr)[2])
plt.title('sigma_array')
plt.show()

--------------
#### Get sigma arrays

In [None]:
for k in range(10):
    plt.plot(np.arange(0,1, 0.0001), np.array(bo.sigma_arr)[k], label = 'sigma_'+str(k))
plt.title('sigma_arrays')
plt.legend()
plt.show()

-------------

In [None]:
get_prob_arr = lambda k: ss.norm.pdf(x = np.linspace(0,1,10000), loc = hmdw.get_func().mean[k], scale = np.sqrt(hmdw.get_func().cov[k][k]))
marginal_dist = margins(np.array([get_prob_arr(i) for i in range(10)]))[1]

In [None]:
rho_arr = ss.norm.pdf(x = np.arange(0,1,0.0001), loc = hmdw.get_func().mean[-1], scale = np.sqrt(hmdw.get_func().cov[-1][-1]))
arr = np.array([np.arange(0,1,0.0001) for _ in range(10)])
mass = rho_foo_1(arr.T)

In [None]:
for k in range(np.array(bo.sigma_arr).shape[0]):
    plt.plot(np.arange(0,1,0.0001), np.array(bo.sigma_arr)[k].reshape(-1)*rho_arr)
    plt.axvline(X.T[0][1:][k])
    plt.show()

In [None]:
bo.sigma_arr

In [None]:
for k in range(np.array(bo.sigma_arr).shape[0]):
    plt.plot(np.arange(0,1,0.0001), np.array(bo.sigma_arr)[k].reshape(-1)*rho_arr)
    plt.axvline(X.T[0][1:][k], color = 'green', alpha = 0.2, linewidth = 0.5)
plt.show()

In [None]:
plt.plot(np.arange(0,1,0.0001), rho_arr)
plt.hist(X.T[0], bins = 40, label = "First_argument", normed = True)
plt.show()

In [None]:
plt.plot(np.linspace(0,1,10000), marginal_dist[0])
plt.title('marginal distribution')
plt.show()

In [None]:
# get covariance matrix (diagonal)
np.diag(hmdw.get_func().cov)

In [None]:
# get mean values
hmdw.get_func().mean

In [None]:
plt.plot(np.linspace(0.,1.,10000), np.array([get_prob_arr(i) for i in range(10)])[0])
plt.show()

In [None]:
#plt.plot(np.linspace(0,1,10000), marginal_dist[0])
plt.hist(np.sum(X.T, 0)/max(np.sum(X.T, 0)), normed = True, bins = 20)
plt.title('Is it legal?')
plt.show()

In [None]:
np.array([np.arange(0,1,0.0001) for _ in range(10)]).T