In [1]:
import sys  
sys.path.insert(0, '../')

In [2]:
import os
import numpy as np
import pandas as pd
import GPyOpt
from feature_extraction.loan_struct_reader import LoanStructReader
from feature_extraction.loan_options_maker import LoanOptionsMaker
from feature_extraction.loan_concatenator import LoanConcatenator
from feature_extraction.loan_markuper import LoanMarkuper
from feature_extraction.hist_market_worker import HistMarketDataWorker

from scipy.stats.contingency import margins
import matplotlib.pyplot as plt
import scipy.stats as ss
import math



In [3]:
plt.rcParams["axes.labelsize"] = 16. 
plt.rcParams["xtick.labelsize"] = 14. 
plt.rcParams["ytick.labelsize"] = 14. 
plt.rcParams["legend.fontsize"] = 12. 
plt.rcParams["figure.figsize"] = [15., 6.]

In [4]:
PROJECT_PATH = os.path.dirname(os.getcwd())
DOMAIN = [{'name': 'var_1', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_2', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_3', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_4', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_5', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_6', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_7', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_8', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_9', 'type': 'continuous', 'domain': (0, 2)},
          {'name': 'var_10', 'type': 'continuous', 'domain': (0, 2)}]

In [5]:
def f(X, json_example_path=os.path.join(PROJECT_PATH, r"json_real", r"sample_1.json")):
    "It should take 2-dimensional numpy arrays as input and return 2-dimensional outputs (one evaluation per row)"
    Y = []
    for x in X:
        struct, original_options = LoanStructReader().Read(json_example_path, returnOptions=True)
        options = LoanOptionsMaker().Make(original_options, ir_curve=x)

        loan = LoanConcatenator(struct, options, {}).Parse()
        Y.append([LoanMarkuper().MarkupLoan(loan)])
    return np.array(Y)#/10

In [6]:
hmdw = HistMarketDataWorker().load()

#new_matrix = hmdw.get_func().cov
#new_matrix[0][0] *= 1e8
#new_matrix[1][1] *= 1e7
#new_matrix[2][2] *= 1e6
#new_matrix[3][3] *= 1e5
#new_matrix[4][4] *= 1e4
#new_matrix[5][5] *= 1e4
#new_matrix[6][6] *= 1e4
#new_matrix[7][7] *= 1e4
#new_matrix[8][8] *= 1e4
#new_matrix[9][9] *= 1e4

normal_dist_1 = ss.multivariate_normal(mean=hmdw.get_func().mean, cov=np.dot(hmdw.get_func().cov, np.eye(10, 10)*1e6), allow_singular=True)
rho_foo_1 = lambda X: np.array([[normal_dist_1.pdf(x)] for x in X])

#np.dot(hmdw.get_func().cov, np.eye(10, 10)*1e6)
#np.diag(np.linspace(1e6, 1, 10))
#np.eye(10, 10)*1e6

In [7]:
ss.multivariate_normal(mean = hmdw.get_func().mean, cov = hmdw.get_func().cov, allow_singular=True).rvs(4)

array([[ 0.99979397,  0.99855781,  0.99379672,  0.98154416,  0.96326492,
         0.92768494,  0.86225126,  0.68966642,  0.46623126,  0.46021215],
       [ 0.99979492,  0.99856444,  0.99388491,  0.9815624 ,  0.96291011,
         0.92635009,  0.85780885,  0.68614275,  0.47030904,  0.46472462],
       [ 0.99978988,  0.99852919,  0.99368285,  0.98129973,  0.96308238,
         0.92771967,  0.86264798,  0.68509945,  0.4458918 ,  0.43969428],
       [ 0.99979677,  0.99857739,  0.99393924,  0.98180224,  0.96349921,
         0.92720898,  0.85856234,  0.68482169,  0.46646948,  0.460986  ]])

In [8]:
hmdw.get_func().mean

array([ 0.99979098,  0.99853687,  0.99374547,  0.98132326,  0.96271213,
        0.92622281,  0.85831136,  0.67986163,  0.45735066,  0.45146176])

In [9]:
f([np.array([ 0.99979098,  0.99853687,  0.99374547,  0.98132326,  0.96271213,
        0.92622281,  0.85831136,  0.67986163,  0.45735066,  0.45146176])])

array([[ 93200697.6221437]])

In [None]:
f([np.array([ 0.19979098,  0.99853687,  0.99374547,  0.98132326,  0.96271213,
        0.92622281,  0.85831136,  0.67986163,  0.45735066,  0.45146176])])

array([[ 93200697.6221437]])

In [None]:
X_init = np.array(np.ones(HistMarketDataWorker().load().get_sample().shape).reshape(1, -1))#[:,:2]
print(X_init.shape)
Y_init = f(X_init)
print(Y_init)

bo = GPyOpt.methods.BayesianOptimization(f=f, domain=DOMAIN,
                                        initial_design_numdata = 5,
                                        X=X_init, Y=Y_init,
                                        acquisition_type='NEW',
                                        exact_feval = True,
                                        normalize_Y = False,
                                        optimize_restarts = 10,
                                        acquisition_weight = 2,
                                        de_duplication = True,
                                        rho_func = rho_foo_1,
                                        with_noise=False
                                        )

X_test = []
for i in range(100):
    X_test.append(hmdw.get_sample())
X_test = np.array(X_test)

bo.run_optimization(300, test_X = X_test)
print('evaluations_list: ', bo.get_evaluations())

print('test_sample_values:')
predict = bo.model.predict(X_test)
print('\mu_values: ', predict[0])
print('\sigma_values: ', predict[1])
print('MAE: ', np.mean(predict[1] / predict[0]))
#print('MAE_list: ', bo.error_list)

(1, 10)
[[ 99999999.99999982]]
optimization_running...
step:  0
anchor_points:  [[ 0.99978232  0.99847622  0.99348537  0.9804638   0.9606945   0.92102011
   0.8460293   0.65697977  0.4309632   0.42500074]
 [ 0.99977837  0.99844859  0.99340586  0.98024291  0.9604058   0.92071681
   0.84568089  0.65581331  0.4349522   0.42915471]
 [ 0.9997783   0.99844812  0.99337086  0.98047016  0.96162378  0.92517312
   0.85582212  0.66847904  0.43122158  0.42516845]
 [ 0.99978099  0.99846694  0.99343899  0.98049463  0.96129754  0.92406399
   0.85425239  0.67026359  0.43168661  0.42533799]
 [ 0.99978924  0.9985247   0.99371805  0.98159458  0.96402714  0.92979506
   0.86406251  0.6769035   0.42866846  0.42215512]]
step:  1
anchor_points:  [[ 0.99980283  0.99861983  0.99408235  0.98210789  0.96368905  0.92684292
   0.85851028  0.68735614  0.48600083  0.48031058]
 [ 0.99980188  0.9986132   0.99402099  0.98216845  0.96452526  0.9299692
   0.86624579  0.69935492  0.48307269  0.47722386]
 [ 0.99979394  0.998

anchor_points:  [[ 0.99978297  0.99848082  0.99354438  0.98099     0.96265136  0.92682843
   0.85799092  0.67135252  0.42874162  0.4222285 ]
 [ 0.99978227  0.99847589  0.99351053  0.9805216   0.96069797  0.92094478
   0.84530432  0.65807565  0.43930397  0.43313062]
 [ 0.99977814  0.99844697  0.99339511  0.98010932  0.96014962  0.92121797
   0.84867872  0.66558036  0.43252212  0.42665505]
 [ 0.99978131  0.99846916  0.99349553  0.98062818  0.96122361  0.92272469
   0.84956199  0.6638694   0.43449795  0.42858845]
 [ 0.99978585  0.99850095  0.99358004  0.98112706  0.96289008  0.92768929
   0.85958082  0.67363483  0.4315471   0.42497894]]
step:  14
anchor_points:  [[ 0.99978374  0.99848619  0.99351319  0.98089582  0.96243719  0.92643752
   0.85870322  0.67254772  0.43209609  0.42547805]
 [ 0.99978483  0.99849381  0.99356238  0.98088577  0.96209809  0.92523056
   0.8543948   0.66838245  0.43054299  0.42413586]
 [ 0.99978399  0.99848797  0.99350751  0.98089659  0.96265084  0.92772166
   0.860

anchor_points:  [[ 0.99979767  0.99858371  0.99389335  0.98198618  0.96486417  0.93261185
   0.87307101  0.70620749  0.46428878  0.45810855]
 [ 0.9997936   0.99855518  0.99381367  0.98191793  0.96481414  0.93270982
   0.87177663  0.69561237  0.44873949  0.44217713]
 [ 0.99979156  0.9985409   0.99373372  0.98161252  0.96441251  0.93227984
   0.87147541  0.69907216  0.44894177  0.44259658]
 [ 0.99979372  0.99855603  0.99382175  0.98170598  0.96435904  0.93179078
   0.87265619  0.7010149   0.45809609  0.45161524]
 [ 0.99978697  0.99850883  0.99355251  0.98100951  0.96330913  0.93061834
   0.87184664  0.70010719  0.44763102  0.44092351]]
step:  27
anchor_points:  [[ 0.99979817  0.99858721  0.99395195  0.98232036  0.96563742  0.93387073
   0.8741346   0.69775511  0.45430505  0.44791322]
 [ 0.99979596  0.99857172  0.993901    0.98200804  0.96513999  0.93337761
   0.87436339  0.70570484  0.46582601  0.45931256]
 [ 0.99980111  0.99860775  0.99403885  0.9824377   0.96559561  0.93310626
   0.872

anchor_points:  [[ 0.99977927  0.99845489  0.99342332  0.980268    0.96016701  0.91969073
   0.84443233  0.65580399  0.43772431  0.4319021 ]
 [ 0.99978024  0.99846171  0.99345815  0.98045651  0.96087238  0.92190322
   0.84832838  0.65697178  0.4368383   0.43107715]
 [ 0.99979857  0.99858999  0.99395689  0.98227568  0.9656701   0.93430848
   0.87815296  0.70949492  0.4665682   0.46030043]
 [ 0.9997825   0.99847748  0.99347586  0.98035313  0.96030031  0.92009343
   0.8447861   0.65895844  0.44066857  0.43474175]
 [ 0.99978554  0.9984988   0.9936007   0.98080428  0.96105622  0.92130602
   0.84759437  0.66310316  0.43787866  0.43189717]]
step:  40
anchor_points:  [[ 0.99978726  0.99851081  0.99366868  0.98140087  0.96375396  0.93041046
   0.86895747  0.6905703   0.44216718  0.43606882]
 [ 0.9997839   0.99848728  0.99354379  0.98076326  0.96203734  0.92642487
   0.86164809  0.68925744  0.44328926  0.43736849]
 [ 0.99979455  0.99856187  0.99382413  0.98187215  0.96459776  0.93164038
   0.869

anchor_points:  [[ 0.99978832  0.99851824  0.99367631  0.981432    0.96396716  0.93076913
   0.86888137  0.69146189  0.44426297  0.43769271]
 [ 0.99979053  0.99853369  0.99372387  0.98151326  0.96388425  0.93042964
   0.86818799  0.69277127  0.44626861  0.43987235]
 [ 0.99979118  0.99853827  0.99367426  0.98147951  0.96396429  0.93094387
   0.87024816  0.69596609  0.45019939  0.44414344]
 [ 0.99979108  0.99853759  0.99369344  0.98134104  0.9632968   0.92888984
   0.86456943  0.6906537   0.44658215  0.44021545]
 [ 0.99979413  0.99855895  0.99384282  0.98185742  0.96440284  0.93125801
   0.86884149  0.69526523  0.45146901  0.44492552]]
step:  53
anchor_points:  [[ 0.99979323  0.99855263  0.99375725  0.98185721  0.9648116   0.93260787
   0.87048798  0.68760476  0.44545609  0.43855937]
 [ 0.99978927  0.99852488  0.99368414  0.98138694  0.96362837  0.92957089
   0.86678297  0.69040046  0.44531738  0.4389665 ]
 [ 0.99978938  0.99852569  0.99368792  0.98149783  0.96404457  0.93143888
   0.870

anchor_points:  [[ 0.9997998   0.99859859  0.9940234   0.98187245  0.9626915   0.92368527
   0.85006562  0.67623924  0.48626239  0.48143548]
 [ 0.99979825  0.99858773  0.99397953  0.98200973  0.96416963  0.92875791
   0.86287567  0.68161224  0.46516129  0.45950327]
 [ 0.99979407  0.99855852  0.9938403   0.98162225  0.96323984  0.92670574
   0.85871927  0.67619384  0.46308315  0.45701481]
 [ 0.99980137  0.9986096   0.99409118  0.98243382  0.96515015  0.93208112
   0.87128583  0.70712771  0.48144229  0.47591296]
 [ 0.99980157  0.99861098  0.99404704  0.98230945  0.96454237  0.92928609
   0.86210277  0.68328772  0.46773124  0.46190181]]
step:  66
anchor_points:  [[ 0.99979446  0.99856123  0.99386532  0.98180399  0.96385647  0.9285628
   0.86095899  0.67769163  0.46137596  0.45561339]
 [ 0.99979636  0.9985745   0.99390045  0.98183518  0.96359936  0.92724737
   0.86047823  0.67905606  0.46745219  0.46154586]
 [ 0.99979444  0.99856112  0.99387527  0.9817194   0.96330327  0.9265464
   0.85758

anchor_points:  [[ 0.99978562  0.99849934  0.99354188  0.98123886  0.96409206  0.93221653
   0.87243874  0.69377624  0.43280575  0.42578653]
 [ 0.99979896  0.99859271  0.99401639  0.98172496  0.96251107  0.92411324
   0.85382659  0.68474299  0.4922521   0.48738528]
 [ 0.99978768  0.99851374  0.99366011  0.98139307  0.96373839  0.93015588
   0.86782355  0.68487559  0.4391421   0.43259572]
 [ 0.99978524  0.99849672  0.99352498  0.98087219  0.9628762   0.9291805
   0.86745779  0.68820636  0.43745763  0.4307294 ]
 [ 0.99978983  0.99852884  0.99369231  0.98153199  0.96397075  0.93034695
   0.86689886  0.68364268  0.44211449  0.43557034]]
step:  79
anchor_points:  [[ 0.99977951  0.99845657  0.99340352  0.98064879  0.96195236  0.92549254
   0.85618189  0.66294454  0.41820319  0.41183802]
 [ 0.99978948  0.99852639  0.99372178  0.98163848  0.96432401  0.9307275
   0.8668186   0.68079516  0.440326    0.43389722]
 [ 0.99979784  0.99858489  0.99395221  0.98184142  0.96315596  0.92566832
   0.85544

anchor_points:  [[ 0.99978778  0.9985145   0.99361796  0.98140784  0.96408026  0.9313398
   0.87030082  0.6897817   0.4366187   0.42987169]
 [ 0.99978969  0.99852782  0.99369364  0.98153067  0.96410739  0.93093361
   0.86835758  0.68592467  0.44011833  0.4331495 ]
 [ 0.99978558  0.99849906  0.99362306  0.98125417  0.96359599  0.92993622
   0.86603753  0.68285424  0.43836889  0.43174948]
 [ 0.99979802  0.99858617  0.99396206  0.98204108  0.96420763  0.92903903
   0.86204099  0.68279268  0.47433507  0.46842505]
 [ 0.99978607  0.99850251  0.9936348   0.98126758  0.96329564  0.92868341
   0.86343504  0.67990815  0.43583859  0.42943436]]
step:  92
anchor_points:  [[ 0.99979504  0.9985653   0.99381654  0.98189412  0.96479627  0.93204838
   0.87005637  0.6916061   0.4350314   0.42844598]
 [ 0.99978758  0.99851305  0.99363435  0.98122457  0.9631231   0.92846514
   0.8649709   0.67950557  0.43326001  0.42636993]
 [ 0.99978593  0.99850151  0.9935906   0.98124398  0.96338772  0.92942262
   0.8649

anchor_points:  [[ 0.99978536  0.99849751  0.99354258  0.98102731  0.9633723   0.93073958
   0.87012626  0.69205703  0.4330542   0.42642112]
 [ 0.99978748  0.9985124   0.99360656  0.98123202  0.96365159  0.93030105
   0.86905076  0.6911534   0.4348952   0.42827597]
 [ 0.99978324  0.99848266  0.99350268  0.98091294  0.96270637  0.92852738
   0.86352298  0.68057736  0.4290451   0.42274656]
 [ 0.99979161  0.99854127  0.99376944  0.98137827  0.96295859  0.92718746
   0.86112344  0.69258884  0.4661303   0.46039277]
 [ 0.99980025  0.99860174  0.99399815  0.98215293  0.96433452  0.92905256
   0.86277288  0.68671095  0.47700982  0.47141643]]
step:  105
anchor_points:  [[ 0.99978138  0.99846969  0.99343762  0.9806826   0.96229711  0.9275514
   0.86168741  0.67662884  0.42660264  0.42016007]
 [ 0.99979762  0.99858337  0.99396725  0.98197604  0.96383394  0.92830789
   0.86244704  0.68705246  0.4739225   0.46849447]
 [ 0.99980089  0.99860621  0.994017    0.98221087  0.96453434  0.93017967
   0.863

anchor_points:  [[ 0.99980718  0.99865024  0.99424013  0.98253785  0.96442426  0.92822729
   0.86354065  0.70285458  0.5011376   0.49596372]
 [ 0.99979217  0.99854519  0.99374204  0.98121668  0.96272398  0.92732981
   0.86500519  0.69488408  0.46119334  0.45512582]
 [ 0.99978735  0.99851148  0.99360823  0.98137269  0.96387472  0.93086878
   0.86979422  0.68879625  0.43974861  0.43295145]
 [ 0.99979519  0.99856636  0.99380659  0.98148821  0.96321354  0.92804336
   0.86394063  0.69290165  0.46245949  0.45647093]
 [ 0.99979903  0.99859319  0.99396953  0.98193506  0.96388182  0.92843311
   0.86401877  0.69224209  0.46761107  0.46191177]]
step:  118
anchor_points:  [[ 0.99977991  0.99845939  0.99341654  0.98058216  0.96162836  0.92470383
   0.85447219  0.66452378  0.4231048   0.41678748]
 [ 0.99979513  0.99856594  0.9938392   0.98159769  0.9632943   0.92785485
   0.86245452  0.69375834  0.4662531   0.46074076]
 [ 0.99978293  0.99848055  0.99349453  0.98090446  0.96286985  0.92876169
   0.86

anchor_points:  [[ 0.9997821   0.99847473  0.99352732  0.98054524  0.9603643   0.91944075
   0.84146759  0.64726843  0.43512289  0.42931757]
 [ 0.99978448  0.99849134  0.99356068  0.98116196  0.96360763  0.93055341
   0.86723915  0.68311536  0.43461416  0.42842918]
 [ 0.99979103  0.99853723  0.99372843  0.98120199  0.96276003  0.92695267
   0.86409509  0.69216779  0.46362027  0.45767135]
 [ 0.99979444  0.99856108  0.99384837  0.98169876  0.96368588  0.9287125
   0.86558842  0.69051535  0.46701806  0.46145139]
 [ 0.99979541  0.99856788  0.99385221  0.9816258   0.96343763  0.92824188
   0.86283985  0.69473509  0.46987922  0.46387017]]
step:  131
anchor_points:  [[ 0.99979735  0.99858144  0.99394673  0.98201578  0.96422644  0.92954383
   0.86489379  0.69045838  0.47005326  0.46419746]
 [ 0.99979173  0.99854215  0.99376344  0.98129572  0.96272759  0.92733164
   0.86358928  0.69521708  0.46901128  0.46323409]
 [ 0.99979241  0.99854687  0.99376636  0.98140673  0.9631353   0.9277228
   0.8645

In [None]:
X_test[0]

In [None]:
f([X_test[0]])

In [None]:
bo.model.predict(X_test[0])

In [None]:
a1 = bo.error_list[2:]
b1 = bo.test_error_list[2:]

In [None]:
plt.plot(a1, label = 'train_MAE')
plt.plot(b1, label = 'model_MAE')
plt.legend()
plt.show()

In [None]:
plt.plot(np.diff(bo.test_error_list[1:])[10:])
print('maximum_diff: ', max(np.diff(bo.test_error_list[1:])[10:]))
plt.show()

In [None]:
real_data = f(bo.X[1:])
test_data = np.array(bo.predictions_list).reshape(-1, 1)

In [None]:
mae_val = abs(real_data - test_data)/real_data

In [None]:
plt.plot((mae_val), label = 'real_MAE_loss')
plt.plot((pd.DataFrame(mae_val).rolling(15).mean()), label = 'real_MAE_average_loss')
plt.title('log_MAE_model_values')
plt.legend()
plt.show()

In [None]:
np.mean(mae_val[20:])

In [None]:
get_prob_arr = lambda k:ss.norm.pdf(x = np.linspace(0,2,10000), 
                                    loc = hmdw.get_func().mean[k], 
                                    scale = np.sqrt(hmdw.get_func().cov)[k][k])

#np.dot(hmdw.get_func().cov, np.eye(10, 10)*1e6
# np.eye(10, 10)*1e6
#np.diag(np.linspace(1e6, 1, 10))

In [None]:
def f_test(x, y):
    f = np.var(x, ddof=1)/np.var(y, ddof=1) 
    p_val = 1-ss.f.cdf(f, x.size-1, y.size-1)
    return f, p_val

In [None]:
f_test(np.array([1, 2, 3, 4, 5, 6]), np.array([1,2,3,4,5,6]))

In [None]:
np.var(bo.X[:, 6])

In [None]:
def show_image(num):
    plt.hist(bo.X[:, num], bins = 100, normed = True)
    plt.plot(np.linspace(0, 2, 10000), get_prob_arr(num))
    plt.title('first_vslues_distribution')
    plt.show()

    a = bo.X[:, num]
    b = ss.norm(loc = hmdw.get_func().mean[num], scale = np.sqrt(hmdw.get_func().cov)[num][num]).rvs(1000)

    print('f_test: ', f_test(a, b))
    
    return None

In [None]:
show_image(0)