In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sps
sns.set()

plt.rc('font', size=30)
plt.rc('axes', titlesize=30)
plt.rc('axes', labelsize=30)
plt.rc('xtick', labelsize=30)
plt.rc('ytick', labelsize=30)
plt.rc('legend', fontsize=30)
plt.rc('figure', titlesize=30)

from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
from sklearn.covariance import MinCovDet
import cvxpy as cvx
from tqdm import tqdm
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import TimeSeriesSplit

Датафрейм returns - это таблица доходностей, то есть $\frac{p_{new} - p_{old}}{p_{old}}$.

In [13]:
returns = pd.read_csv('work/data/returns.csv')
returns = returns.fillna(0)
returns['date'] = returns['date'].astype(np.datetime64)
returns = returns.set_index('date')

train_size = 1500

returns_train = returns[:train_size]
returns_test = returns[train_size:]

In [34]:
from portfolio_optimizer import PortfolioOptimizer

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
%run helper_functions.ipynb

In [51]:
param_grid = {
    'size_of_window': [None] * 20 + np.linspace(200, 1800, 321, dtype=int).tolist(),
    'n_top_companies': np.linspace(1, 100, 100, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720, 900],
    'R': np.linspace(1e-3, 3e-3, 21),
}

In [23]:
tscv = TimeSeriesSplit(n_splits=5)

In [52]:
po_randomsearch = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02), 
    param_distributions=param_grid,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=100,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [53]:
%%time
po_randomsearch.fit(returns_train)

CPU times: user 1.38 s, sys: 161 ms, total: 1.54 s
Wall time: 26 s



One or more of the test scores are non-finite: [        nan  0.60510835  0.60498829  0.63663476  0.46627716  0.52854226
 -0.84793225  0.52173034  0.44071898  0.58104927  0.68014066  1.06911948
  0.65022586  0.55277175         nan  0.82020673  0.66929337  0.48310162
  0.91089273  0.59796291  0.90181722  0.74425925  0.6348584   0.55134675
  1.28768408  0.63848998  0.5675734   0.45409051  0.68096789  0.76452113
         nan  0.37840755  0.49895669  0.68911361  0.60493304  0.58350989
  0.59996703  1.24084325  0.590926    0.66263271  0.67107076  0.48994933
  2.65430875  0.60992804  0.7273605   0.5789285   0.62751098  0.52507276
  0.64761974  0.35471496         nan  0.41543353  0.65196913  0.5995646
  0.56963882  0.56608971  0.60226496         nan  0.58461037  0.77760871
  0.63614918  0.62535472  0.87035352  0.37185676  0.80695647  0.55673349
  0.63512394  0.79257334  0.3200348   0.60336213  0.78738668  0.80872709
  0.64048991         nan  0.589442    0.42777605  0.9300652          nan
  0.

RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=PortfolioOptimizer(risk_free_return=1.02),
                   n_iter=100, n_jobs=-2,
                   param_distributions={'R': array([0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017,
       0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023, 0.0024, 0.0025,
       0.0026, 0.0027, 0.0028, 0.0029, 0.003 ]),
                                        'n_top_companies': [1, 2, 3, 4, 5, 6, 7,
                                                            8, 9, 10, 11, 12,
                                                            13, 14, 15, 16, 17,
                                                            18, 19, 20, 21, 22,
                                                            23, 24, 25, 26, 27,
                                                            28, 29, 30, ...],
                                        'period_change_portfolio': [None, 120,

In [27]:
po_randomsearch.best_params_

{'size_of_window': 200,
 'period_change_portfolio': 120,
 'n_top_companies': 35,
 'R': 0.0025}

In [109]:
po_randomsearch.best_estimator_.w_

array([-8.01133699e-18, -1.39186237e-18, -1.59573235e-18,  1.43866430e-03,
       -7.95867453e-19, -1.08467400e-18, -3.63159818e-19, -6.74107126e-19,
       -3.67786464e-19,  4.00303660e-02, -2.72430548e-19, -5.45264211e-19,
        5.12324321e-02, -2.26563663e-19, -5.60537235e-19, -2.43585107e-19,
       -3.89057765e-19,  2.24835127e-02, -1.56802494e-19, -8.05538286e-20,
        2.76792768e-19, -1.38627976e-19, -1.22250303e-19,  2.75091270e-01,
        3.32658600e-02,  2.61237458e-19,  1.49689562e-01,  1.32282630e-19,
       -1.12606158e-19,  4.45727710e-20,  5.43002688e-02, -3.89426029e-21,
        2.59442783e-01,  1.13025281e-01,  1.80918557e-20])

In [66]:
po = PortfolioOptimizer(risk_free_return=1.02, size_of_window=200,
                        period_change_portfolio=120, n_top_companies=35,
                        R=0.0025)
po.fit(returns_train)
po.w_

array([ 1.61158879e-03, -6.02237885e-18,  5.15134528e-03,  7.93810517e-18,
        7.24437422e-03,  1.44368016e-02, -7.14793261e-19,  1.91825039e-17,
       -4.79698206e-18, -1.56038545e-17, -3.44463547e-18, -8.66602656e-18,
       -3.93944215e-18,  2.02474219e-02,  5.06441126e-02, -4.15262665e-18,
        1.86528659e-02,  3.61245009e-03,  2.52610863e-02,  8.99754397e-03,
        1.41046833e-01, -4.69463712e-18,  2.82174679e-01,  7.78905194e-03,
       -1.19819163e-18,  3.25593774e-02,  1.11737353e-01,  9.73508633e-02,
        2.72678072e-02, -3.70177291e-18,  7.25005019e-02,  1.33165705e-02,
        5.49195983e-18,  5.83973723e-02, -3.75119307e-18])

In [79]:
param_grid_PCA_preproc = {
    'size_of_window': [None] * 20 + np.linspace(200, 1800, 321, dtype=int).tolist(),
    'n_top_companies': np.linspace(10, 100, 91, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720, 900],
    'R': np.linspace(1e-3, 3e-3, 21),
    'preprocessing_kept_dim': np.linspace(2, 20, 20, dtype=int)
}

In [80]:
po_randomsearch_PCA_preproc = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02, is_PCA_preprocessing=True), 
    param_distributions=param_grid_PCA_preproc,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=200,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [81]:
%%time
po_randomsearch_PCA_preproc.fit(returns_train)


One or more of the test scores are non-finite: [0.55771556 0.73047467 0.94700166        nan 0.81398104 0.64148111
 0.67731578 0.57705985 0.7818914         nan 0.48162965 0.69388188
 0.74405548 0.710865   0.84904389 0.73736019        nan 0.61132114
 0.72658489 0.83046331        nan 0.80760548 0.55895568 0.53237888
 0.72023142 0.61567404 0.46970198 0.60666805 0.56050985 0.46384576
 0.61230974 0.55993264 0.91385473        nan 0.77620864        nan
 0.62090192 0.26445578 0.76460278 0.80370754 0.35551537 0.64197955
 0.46944604 0.73551993        nan 0.58800535 0.6555216  0.74870316
 0.65740173 0.52112908 0.68552166        nan 0.3134295  0.77690629
 0.481702   0.5116878  0.72615143 0.61223219 0.829933          nan
        nan 0.72764065 0.74091378 0.62655841 0.77279152 0.75629818
 0.74740091 0.6002004  0.72181464 0.47899111 0.73675333 0.8286496
 0.71833503        nan 0.70613766        nan 0.68552166 0.42934193
 0.7055523  0.60049942 0.4942372  0.12266521 0.5942761  0.64148111
 0.79317866 0.9

CPU times: user 2.57 s, sys: 317 ms, total: 2.88 s
Wall time: 47 s


RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=PortfolioOptimizer(is_PCA_preprocessing=True,
                                                risk_free_return=1.02),
                   n_iter=200, n_jobs=-2,
                   param_distributions={'R': array([0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017,
       0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023, 0.0024, 0.0025,
       0.0026, 0.0027, 0.00...
                                                            30, 31, 32, 33, 34,
                                                            35, 36, 37, 38, 39, ...],
                                        'period_change_portfolio': [None, 120,
                                                                    240, 360,
                                                                    480, 720,
                                                                    900],
                       

In [82]:
po_randomsearch_PCA_preproc.best_params_

{'size_of_window': 1380,
 'preprocessing_kept_dim': 8,
 'period_change_portfolio': 120,
 'n_top_companies': 56,
 'R': 0.0022}

In [84]:
param_grid_PCA = {
    'size_of_window': [None] * 20 + np.linspace(200, 1800, 65, dtype=int).tolist(),
    'n_top_companies': np.linspace(20, 100, 17, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720, 900],
    'R': np.linspace(1e-3, 3e-3, 21),
    'n_components': np.linspace(1, 20, 20, dtype=int)
}

In [96]:
po_randomsearch_PCA = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02, is_PCA=True), 
    param_distributions=param_grid_PCA,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=100,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [98]:
%%time
po_randomsearch_PCA.fit(returns_train)

CPU times: user 1.43 s, sys: 158 ms, total: 1.59 s
Wall time: 28.5 s



One or more of the test scores are non-finite: [0.83405769 0.78979741 0.45162568 0.70216089 0.69411384 0.55079279
 0.73371487 0.46278074 0.71102661 0.66764486 0.58144396 0.64807528
 0.78315379 0.70747787 0.57834795 0.62233641 0.80212867 0.76365275
 0.81351869 0.62841141 0.54353096 0.64273861 0.74237949 0.57417968
 0.76260173 0.74955017 0.3982633  0.62310329 0.72342414 0.80096405
 0.75165859 0.70549917 0.68653926 1.02932585 0.75273811 0.67349662
 0.54342966 0.58852376 0.60006381 0.72713756 0.71859617 0.88622523
 0.5491222  0.77354443 0.66227657 0.6231107  0.4604923  0.59464437
 0.70473798 0.68407965 0.64082948 0.76862613 0.6247771  0.68717954
 0.97977168 0.64045241 0.75924791 0.78459751 0.69170597 0.73829763
 0.57716601 0.68201737 0.56943981 0.8505188  0.76517643 0.78629842
 0.65499837 0.78851512 0.4980878  1.13159785 0.68244537 0.6305012
 0.60392799 0.86910611 0.59413362 0.8197375  0.76689753 0.49592059
 0.66719715 0.69470205 0.66314599 0.68871527 0.76309403 0.74843662
 1.06301546 0.7

RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=PortfolioOptimizer(is_PCA=True,
                                                risk_free_return=1.02),
                   n_iter=100, n_jobs=-2,
                   param_distributions={'R': array([0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017,
       0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023, 0.0024, 0.0025,
       0.0026, 0.0027, 0.0028, 0.0029, 0.00...
                                        'n_components': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20]),
                                        'n_top_companies': [20, 25, 30, 35, 40,
                                                            45, 50, 55, 60, 65,
                                                            70, 75, 80, 85, 90,
                                                            95, 100],
                                  

In [90]:
param_grid_PCA_PCA_preproc = {
    'size_of_window': [None] * 20 + np.linspace(200, 1800, 65, dtype=int).tolist(),
    'n_top_companies': np.linspace(20, 100, 17, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720, 900],
    'R': np.linspace(1e-3, 3e-3, 21),
    'n_components': np.linspace(1, 20, 20, dtype=int),
    'preprocessing_kept_dim': np.linspace(2, 20, 20, dtype=int)
}

In [94]:
po_randomsearch_PCA_PCA_preproc = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02, is_PCA=True, is_PCA_preprocessing=True), 
    param_distributions=param_grid_PCA_PCA_preproc,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=200,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [95]:
po_randomsearch_PCA_PCA_preproc.fit(returns_train)


One or more of the test scores are non-finite: [0.78981929 0.71515635 0.71755892 0.72144586 0.70032134 0.78176313
 0.97407499 0.6369114  0.58361858 0.9055623  0.44058937 0.6223079
 0.6042911  0.52805717 0.72513396 0.75381759 0.59569232 0.75646081
 0.60852562 0.67433224 0.7544174  0.62788791 0.51241563 0.70717087
 0.62068361 0.72473668 0.52965464 0.69614101 0.71097322 0.64409888
 0.64946958 0.75892527 0.73161758 0.36899526 0.60075637 0.4412236
        nan 0.46834172 1.63543028 0.69375375 1.01043065 0.55996065
 0.61440257 0.64871377 0.37295943 1.02548291 0.7850438  0.7068691
 0.65795309 0.81916629 0.80227087 0.89023106 0.39054279 0.68021197
 0.52849628 0.71930163 0.72864898 0.72350659 0.67771725 0.66755886
 0.49624471 0.73535581 0.59239244 0.73005342 0.79279113 0.59569232
 0.71688949 0.74315574 0.78494333 0.59644917 0.85750052 0.47236131
 0.67265896 0.46638868 0.64175597 0.67978847 0.69944278 0.71932599
 0.67711326 0.86735135 0.77756752 0.70215418 0.77150024 0.59091802
 0.74053605 0.762

RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=PortfolioOptimizer(is_PCA=True,
                                                is_PCA_preprocessing=True,
                                                risk_free_return=1.02),
                   n_iter=200, n_jobs=-2,
                   param_distributions={'R': array([0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017,
       0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023, 0.0024, 0.0025,
       0.0026...
                                                            45, 50, 55, 60, 65,
                                                            70, 75, 80, 85, 90,
                                                            95, 100],
                                        'period_change_portfolio': [None, 120,
                                                                    240, 360,
                                                                

In [110]:
param_grid_rbf = {
    'size_of_window': [None] * 20 + np.linspace(200, 1800, 65, dtype=int).tolist(),
    'n_top_companies': np.linspace(40, 100, 13, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720],
    'R': np.linspace(1e-3, 3e-3, 21),
    'n_components': np.linspace(1, 20, 20, dtype=int),
    'kernelgamma': np.linspace(0.01, 0.1, 21)
}

In [111]:
po_randomsearch_rbf = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02, is_kenrel_PCA=True, kernel='rbf'), 
    param_distributions=param_grid_rbf,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=100,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [112]:
%%time
po_randomsearch_rbf.fit(returns_train)


One or more of the test scores are non-finite: [nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]



AttributeError: 'KernelPCA' object has no attribute 'eigenvectors_'

In [113]:
param_grid_no_window = {
    'n_top_companies': np.linspace(1, 100, 100, dtype=int).tolist(),
    'period_change_portfolio': [None, 120, 240, 360, 480, 720, 900],
    'R': np.linspace(1e-3, 3e-3, 21),
}

In [114]:
po_randomsearch_no_window = RandomizedSearchCV(
    estimator=PortfolioOptimizer(risk_free_return=1.02), 
    param_distributions=param_grid_no_window,
    cv=tscv,
    verbose=0,
    n_jobs=-2,  # кол-во параллельных процессов
    n_iter=100,  # кол-во итераций случайного выбора гиперпараметров
    random_state=16  # seed для фиксации генератора случайных чисел
)

In [115]:
%%time
po_randomsearch_no_window.fit(returns_train)

CPU times: user 1.46 s, sys: 233 ms, total: 1.69 s
Wall time: 26.8 s



One or more of the test scores are non-finite: [1.39717631 0.6199254  0.54764057        nan 0.54987293        nan
 0.56075505 0.4924026         nan 0.44027877 0.62378478 0.59722687
 0.83705182 0.50047404 0.46936974 0.68005927 0.44143766 0.44027877
 0.60019122 0.55314598        nan 0.44328032 0.6043434  0.568082
 0.59112144 0.63661141 0.56635454 0.45043539 0.6558304  0.52195909
        nan 0.60736714 0.40133186 0.4844703  0.60844932 0.49928292
 0.56901215 0.52044141 0.60550858 0.62387883 0.60369314 0.56691043
 0.38730035 0.48228789 0.66712057 0.55479099 0.60992804 0.69824528
 0.64038569 0.62510201 0.52421695 0.6350204  0.56450586 0.76285401
 0.46792216        nan 0.55667435 0.58620578 0.56335572 0.63083961
 0.62310569 0.50670581 0.58958802 0.65690262 0.62549506 0.59181204
 0.62387883 0.53668694 0.47749993 0.68446579 0.59184928 0.64401689
 0.8351361  0.55278099 0.4182812  0.67548516 1.00318369 0.43803617
 0.52437736 0.59020095 0.6054787  0.50152684 0.51467785        nan
 0.58503145 0.60

RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=PortfolioOptimizer(risk_free_return=1.02),
                   n_iter=100, n_jobs=-2,
                   param_distributions={'R': array([0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017,
       0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023, 0.0024, 0.0025,
       0.0026, 0.0027, 0.0028, 0.0029, 0.003 ]),
                                        'n_top_companies': [1, 2, 3, 4, 5, 6, 7,
                                                            8, 9, 10, 11, 12,
                                                            13, 14, 15, 16, 17,
                                                            18, 19, 20, 21, 22,
                                                            23, 24, 25, 26, 27,
                                                            28, 29, 30, ...],
                                        'period_change_portfolio': [None, 120,

In [163]:
po_randomsearch.best_estimator_.predict(returns_test[:-10])

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2017-12-18  0.020630 -0.009215  0.000324 -0.025445 -0.000351  0.033289   
2017-12-19 -0.006617 -0.009846 -0.028217 -0.017041  0.000186 -0.005571   
2017-12-20  0.008421 -0.006079 -0.010380 -0.009222 -0.002151  0.008905   
2017-12-21 -0.010960  0.002687 -0.011362  0.000753  0.008745  0.003273   
2017-12-22 -0.004750  0.003421  0.007659 -0.002604 -0.004232 -0.002723   
...              ...       ...       ...       ...       ...       ...   
2018-06-05  0.008017 -0.011407 -0.004010  0.002072 -0.010164  0.018078   
2018-06-06  0.010532 -0.002524  0.007013 -0.016252  0.008803  0.022516   
2018-06-07 -0.027846 -0.003985 -0.007749  0.010362  0.000968  0.004240   
2018-06-08  0.006673  0.005463  0.001080  0.003375  0.009521  0.004637   
2018-06-11  0.022944  0.002819 -0.007094  0.009749 -0.003997 -0.004743   

                 MLM       CAG       

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2019-05-28 -0.021614 -0.013376 -0.005910 -0.018911 -0.011205 -0.012061   
2019-05-29  0.006420 -0.004842 -0.013483 -0.013511 -0.011208  0.010256   
2019-05-30  0.007750  0.017373  0.000387 -0.012099  0.004022 -0.004752   
2019-05-31 -0.016638  0.018162  0.021999  0.011704 -0.005891 -0.040815   
2019-06-03  0.023267  0.014000 -0.007233  0.010384  0.013252  0.046328   
...              ...       ...       ...       ...       ...       ...   
2019-11-07  0.020047 -0.014934 -0.009811 -0.007772 -0.004448  0.004436   
2019-11-08  0.010280 -0.008337  0.000286 -0.000374  0.008576 -0.001029   
2019-11-11 -0.006907  0.002296 -0.004797 -0.008223 -0.003716 -0.002049   
2019-11-12 -0.008235 -0.003436 -0.013972  0.001254  0.010086 -0.006282   
2019-11-13 -0.014024  0.005441  0.013395  0.014046  0.008317 -0.013669   

                 MLM       CAG       

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2020-10-28 -0.050393 -0.023408 -0.046179 -0.032375 -0.023210 -0.055075   
2020-10-29  0.048958  0.005483  0.060433  0.014407 -0.003802  0.041546   
2020-10-30  0.003046 -0.009575  0.035476 -0.007526 -0.008961 -0.052075   
2020-11-02 -0.013581  0.021007  0.060644  0.016429  0.019939  0.050562   
2020-11-03 -0.002760  0.005451  0.032405  0.015299  0.002741  0.013216   
...              ...       ...       ...       ...       ...       ...   
2021-04-12 -0.007829  0.013125  0.001375 -0.000515 -0.001175  0.012389   
2021-04-13  0.006953 -0.002422  0.009516  0.015865  0.002755 -0.007590   
2021-04-14 -0.017530 -0.008017 -0.017489 -0.000651 -0.000400  0.022663   
2021-04-15  0.031487  0.021512  0.027542  0.011500  0.003411  0.001502   
2021-04-16 -0.007732  0.000836  0.001142  0.007404  0.010556  0.004195   

                 MLM       CAG       

(array([1.19855355, 0.88849474, 1.18647662, 1.23971795, 2.24045653,
        1.39758131, 1.82595557]),
 2017-12-18    1.005000
 2017-12-19    1.001974
 2017-12-20    1.003025
 2017-12-21    1.008258
 2017-12-22    1.005118
                 ...   
 2021-04-12    8.325674
 2021-04-13    8.511394
 2021-04-14    8.936606
 2021-04-15    8.858884
 2021-04-16    8.955672
 Length: 837, dtype: float64)

In [119]:
arr = np.array([1.05647182, 0.88849474, 1.18647662, 1.23971795, 2.24045653,
        1.39758131, 1.82398691, 1.0319652])
np.cumprod(arr)

array([1.05647182, 0.93866966, 1.1137096 , 1.38068578, 3.09336648,
       4.32323117, 7.88551707, 8.1375792 ])

In [154]:
np.argsort(po_randomsearch.best_estimator_.w_)

array([17, 14, 16,  7,  9, 30, 21, 19, 29,  8, 32, 27, 28, 10, 12, 15, 20,
       22, 13, 33, 34,  2,  3, 31, 11,  4,  0, 25, 23, 18,  5, 24, 26,  1,
        6])

In [156]:
np.sort(po_randomsearch.best_estimator_.w_)

array([-3.05264573e-19, -2.38582660e-19, -1.27860529e-19, -7.96617470e-20,
       -7.60353199e-20, -6.84100214e-20, -4.79207222e-20,  2.32642134e-20,
        7.44728278e-20,  9.11250667e-20,  1.50278408e-19,  1.95958345e-19,
        2.17350653e-19,  2.30164378e-19,  2.52524865e-19,  2.70036084e-19,
        3.86452222e-19,  5.26302151e-19,  5.80747013e-19,  6.38237346e-19,
        8.25159458e-19,  5.79400428e-03,  8.46348361e-03,  1.29246399e-02,
        1.62809631e-02,  3.73797207e-02,  3.91053855e-02,  5.14913089e-02,
        6.69183328e-02,  9.05818584e-02,  9.52452482e-02,  9.75293625e-02,
        1.19004857e-01,  1.44161501e-01,  2.15119334e-01])

In [143]:
np.max(po_randomsearch.best_estimator_.top_returns_.to_numpy()[:,0])

1.3483523457054825

Вот норм!!!

In [171]:
np.max(returns_test[po_randomsearch.best_estimator_.top_returns_.columns].loc['2021-01-26'])

0.9269831078186896

In [175]:
rets = returns_test[po_randomsearch.best_estimator_.top_returns_.columns].loc['2021-01-26'].to_numpy()
ws = po_randomsearch.best_estimator_.w_
ans = 0

for w, ret in zip(ws, rets):
    ans += w * (ret + 1)
ans

1.0240599042316625

In [177]:
rets = returns_test[po_randomsearch.best_estimator_.top_returns_.columns].loc['2021-01-27'].to_numpy()
ws = po_randomsearch.best_estimator_.w_
ans = 0

for w, ret in zip(ws, rets):
    ans += w * (ret + 1)
ans

0.9716855844674731

In [144]:
po_randomsearch.best_estimator_.top_returns_.to_numpy()[:,0]

array([-3.53724404e-02,  4.15616204e-02, -1.17370542e-02,  3.08744754e-02,
       -1.84310940e-02, -4.22550122e-02,  2.69566637e-02, -4.77409373e-03,
       -5.03555978e-02, -2.77786953e-02,  4.15518926e-02,  2.49350774e-02,
        0.00000000e+00, -1.94668174e-02, -4.96358271e-03, -1.74581195e-02,
        3.04626450e-02,  9.85050621e-03, -2.19535654e-02,  3.49187295e-02,
        6.74698725e-02,  4.51383945e-02, -4.31936437e-02, -6.09423040e-02,
        4.08727033e-02,  4.61873330e-03,  3.90728092e-02,  2.65464574e-02,
        2.37102049e-02, -2.52636471e-02,  3.88841863e-02, -1.87113282e-02,
       -2.33063896e-02,  9.11045162e-02, -3.18132509e-02,  2.25905288e-02,
        2.61022451e-02,  2.73925512e-02,  2.66640769e-02,  2.39333451e-01,
        1.45185120e-01,  7.84238964e-03,  1.42698643e-02, -2.17350787e-02,
        6.53602183e-03, -4.54461512e-02, -1.52367487e-01, -2.24726071e-02,
        1.34664343e-01,  2.60458421e-02,  2.24297601e-01,  5.98968777e-02,
        2.93427054e-02, -

In [165]:
po_randomsearch.best_estimator_.top_returns_

Unnamed: 0_level_0,TSLA,MRNA,ENPH,PENN,GME,ETSY,CARR,PBI,BIG,RRC,...,SWN,AMZN,DXCM,CZR,NOW,TWTR,ROL,CTLT,MKTX,FOSL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-14,0.024887,-0.012831,-0.019164,0.016963,-0.132589,0.024813,0.000000,0.015067,-0.006668,0.010988,...,0.000000,-0.011560,-0.003307,-0.006234,-0.007735,0.003977,0.002005,0.014468,-0.005398,0.093921
2020-01-15,-0.036103,0.011998,-0.001325,-0.023724,-0.021231,-0.005900,0.000000,0.029960,-0.007031,-0.028258,...,-0.039020,-0.003970,-0.003063,-0.002543,0.011179,0.012494,0.005438,0.008788,0.004064,-0.015149
2020-01-16,-0.009662,0.038038,0.022219,-0.002658,0.002169,0.041755,0.000000,0.016991,-0.029312,-0.024607,...,-0.015228,0.008551,0.000299,0.029409,0.016893,0.028893,0.011336,0.010356,0.005188,0.038454
2020-01-17,-0.005822,-0.018559,0.021729,-0.010276,0.028139,0.010608,0.000000,0.016707,-0.016715,-0.091739,...,-0.041231,-0.007039,-0.015441,0.013048,0.008065,0.000877,0.006162,-0.009597,-0.002567,0.026548
2020-01-21,0.071878,0.015517,-0.007618,-0.031543,-0.033691,-0.014388,0.000000,-0.060995,-0.028593,-0.083336,...,-0.075279,0.014630,0.003596,-0.029994,0.001472,-0.003798,0.000850,0.010514,0.004417,0.003006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-21,0.001659,-0.041229,-0.125807,-0.023509,0.017315,-0.081369,0.003590,0.002784,-0.019377,-0.003452,...,0.003496,-0.009968,-0.011696,-0.037083,-0.015266,0.083930,-0.009054,-0.008051,0.014711,0.010508
2020-10-22,0.007454,0.036122,0.019012,0.009028,0.057451,0.022032,0.016185,0.010826,-0.019562,0.084300,...,0.090596,-0.002681,0.020513,0.032032,-0.008468,0.000796,0.004069,0.010714,0.016646,0.091841
2020-10-23,-0.012120,-0.004377,-0.001257,-0.014128,0.006037,0.014014,0.030692,0.024164,-0.025441,-0.031945,...,0.003195,0.008816,0.038420,0.010346,0.015913,0.003182,0.012616,0.006638,-0.014911,-0.034922
2020-10-26,-0.000832,-0.004112,0.003290,-0.033755,-0.103335,0.013672,-0.034925,-0.009116,0.009239,-0.038498,...,-0.041404,0.000824,0.017029,-0.053583,-0.024123,-0.028549,-0.011628,-0.028820,-0.020753,0.026316


In [176]:
names = ['Без оптимизаций', 'PCA preprocessing', 'PCA', 'PCA + PCA preproc', 'No window']
optimizers = [po_randomsearch, po_randomsearch_PCA_preproc, po_randomsearch_PCA, 
              po_randomsearch_PCA_PCA_preproc, po_randomsearch_no_window]
plot_results(names=names, optimizers=optimizers, df_train=returns_train, df_test=returns_test, 
             title='Return лучших моделей из каждого метода')

0it [00:00, ?it/s]

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2017-12-18  0.020630 -0.009215  0.000324 -0.025445 -0.000351  0.033289   
2017-12-19 -0.006617 -0.009846 -0.028217 -0.017041  0.000186 -0.005571   
2017-12-20  0.008421 -0.006079 -0.010380 -0.009222 -0.002151  0.008905   
2017-12-21 -0.010960  0.002687 -0.011362  0.000753  0.008745  0.003273   
2017-12-22 -0.004750  0.003421  0.007659 -0.002604 -0.004232 -0.002723   
...              ...       ...       ...       ...       ...       ...   
2018-06-05  0.008017 -0.011407 -0.004010  0.002072 -0.010164  0.018078   
2018-06-06  0.010532 -0.002524  0.007013 -0.016252  0.008803  0.022516   
2018-06-07 -0.027846 -0.003985 -0.007749  0.010362  0.000968  0.004240   
2018-06-08  0.006673  0.005463  0.001080  0.003375  0.009521  0.004637   
2018-06-11  0.022944  0.002819 -0.007094  0.009749 -0.003997 -0.004743   

                 MLM       CAG       

Name: ENPH, Length: 120, dtype: float64
                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2020-05-08  0.040387  0.019459 -0.009677  0.003843  0.010577  0.050623   
2020-05-11 -0.003647  0.013255 -0.015119  0.010707  0.019760 -0.006908   
2020-05-12 -0.004750 -0.034858 -0.053603 -0.003656 -0.009240 -0.003563   
2020-05-13 -0.046390 -0.006679  0.003830  0.000758  0.009073 -0.058526   
2020-05-14  0.005908 -0.007505 -0.025002  0.007850  0.027730  0.013313   
...              ...       ...       ...       ...       ...       ...   
2020-10-21 -0.015002 -0.004071  0.000399 -0.010488 -0.002549 -0.015933   
2020-10-22  0.011727 -0.046032  0.012958  0.011852  0.014606  0.006398   
2020-10-23  0.021027  0.014207  0.003507 -0.000493  0.007828 -0.002329   
2020-10-26 -0.014002  0.009149 -0.020714  0.007142 -0.012402 -0.019117   
2020-10-27  0.009248  0.004316 -0.018494  0.009177 -0.010791 -0.027313  

1it [00:00,  1.30it/s]

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2021-04-22 -0.014278  0.007218 -0.008921 -0.009731 -0.016030 -0.056593   
2021-04-23  0.023140  0.016124  0.010120 -0.000508 -0.001029  0.023624   
2021-04-26  0.022948 -0.005771 -0.001548 -0.008816 -0.004498  0.005077   
2021-04-27 -0.011632 -0.007309  0.000539 -0.004958 -0.002191 -0.000279   
2021-04-28 -0.019426  0.002436 -0.033900 -0.008295 -0.003492  0.013819   
2021-04-29 -0.016980  0.014958 -0.002789  0.017766 -0.044237  0.010624   
2021-04-30 -0.026216  0.005855  0.015202  0.011138  0.011125 -0.034893   

                 MLM       CAG       XEL       AMG  ...        FL      ZBRA  \
date                                                ...                       
2021-04-22 -0.009574 -0.009838 -0.002808 -0.015368  ... -0.001737 -0.008181   
2021-04-23  0.006482 -0.014681 -0.000566  0.016056  ...  0.023926  0.020643   
2021-04-26  0.018

Name: PAYC, Length: 120, dtype: float64
                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2019-11-14 -0.000873  0.017501  0.003449  0.008910 -0.003180  0.006511   
2019-11-15  0.014799  0.007557  0.015117  0.006740  0.004144  0.001251   
2019-11-18 -0.008432  0.009362  0.005954  0.005726 -0.009075 -0.011254   
2019-11-19  0.003908 -0.011917  0.004954 -0.001937  0.006189 -0.019595   
2019-11-20 -0.016137  0.019132 -0.011723  0.005822  0.007325 -0.008916   
...              ...       ...       ...       ...       ...       ...   
2020-05-01 -0.054571 -0.018942 -0.029866 -0.019715 -0.021047 -0.048132   
2020-05-04  0.004072  0.008441  0.023146  0.002512 -0.010433 -0.014692   
2020-05-05  0.037788  0.006023  0.009204  0.028245  0.014964  0.009207   
2020-05-06  0.019345 -0.023257 -0.012801 -0.002707 -0.011408 -0.049943   
2020-05-07  0.011559 -0.001938  0.031783  0.004767 -0.019704  0.031086  

2it [00:01,  1.15it/s]


                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2017-12-18  0.020630 -0.009215  0.000324 -0.025445 -0.000351  0.033289   
2017-12-19 -0.006617 -0.009846 -0.028217 -0.017041  0.000186 -0.005571   
2017-12-20  0.008421 -0.006079 -0.010380 -0.009222 -0.002151  0.008905   
2017-12-21 -0.010960  0.002687 -0.011362  0.000753  0.008745  0.003273   
2017-12-22 -0.004750  0.003421  0.007659 -0.002604 -0.004232 -0.002723   
...              ...       ...       ...       ...       ...       ...   
2018-11-23  0.006065 -0.013803 -0.008503 -0.003422 -0.001610 -0.016657   
2018-11-26  0.021235  0.003989 -0.000467  0.012178  0.010315  0.013870   
2018-11-27 -0.026386  0.019278  0.007410  0.001627  0.011922 -0.009991   
2018-11-28  0.043436  0.014849  0.007432 -0.010661  0.011664  0.009207   
2018-11-29  0.003100  0.007856 -0.003460  0.005863  0.008809  0.010864   

                 MLM       CAG      

3it [00:02,  1.22it/s]

                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2020-10-28 -0.050393 -0.023408 -0.046179 -0.032375 -0.023210 -0.055075   
2020-10-29  0.048958  0.005483  0.060433  0.014407 -0.003802  0.041546   
2020-10-30  0.003046 -0.009575  0.035476 -0.007526 -0.008961 -0.052075   
2020-11-02 -0.013581  0.021007  0.060644  0.016429  0.019939  0.050562   
2020-11-03 -0.002760  0.005451  0.032405  0.015299  0.002741  0.013216   
...              ...       ...       ...       ...       ...       ...   
2021-04-26  0.022948 -0.005771 -0.001548 -0.008816 -0.004498  0.005077   
2021-04-27 -0.011632 -0.007309  0.000539 -0.004958 -0.002191 -0.000279   
2021-04-28 -0.019426  0.002436 -0.033900 -0.008295 -0.003492  0.013819   
2021-04-29 -0.016980  0.014958 -0.002789  0.017766 -0.044237  0.010624   
2021-04-30 -0.026216  0.005855  0.015202  0.011138  0.011125 -0.034893   

                 MLM       CAG       

4it [00:02,  1.77it/s]

date
2017-12-18    1.018963
2017-12-19    1.015997
2017-12-20    1.016443
2017-12-21    1.011518
2017-12-22    1.009403
                ...   
2021-04-26    2.820613
2021-04-27    2.818891
2021-04-28    2.809865
2021-04-29    2.789299
2021-04-30    2.739818
Name: NVDA, Length: 847, dtype: float64
                MPWR       CCI       ESS         D       MRK       LYB  \
date                                                                     
2017-12-18  0.020630 -0.009215  0.000324 -0.025445 -0.000351  0.033289   
2017-12-19 -0.006617 -0.009846 -0.028217 -0.017041  0.000186 -0.005571   
2017-12-20  0.008421 -0.006079 -0.010380 -0.009222 -0.002151  0.008905   
2017-12-21 -0.010960  0.002687 -0.011362  0.000753  0.008745  0.003273   
2017-12-22 -0.004750  0.003421  0.007659 -0.002604 -0.004232 -0.002723   
...              ...       ...       ...       ...       ...       ...   
2021-04-26  0.022948 -0.005771 -0.001548 -0.008816 -0.004498  0.005077   
2021-04-27 -0.011632 -0.007309  0.00

5it [00:03,  1.65it/s]

date
2017-12-18    1.010547
2017-12-19    1.006282
2017-12-20    1.006523
2017-12-21    1.002073
2017-12-22    1.002516
                ...   
2021-04-26    2.745557
2021-04-27    2.722191
2021-04-28    2.714073
2021-04-29    2.676291
2021-04-30    2.690691
Name: NFLX, Length: 847, dtype: float64





In [152]:
np.max(returns_test.loc['2021-01-26'])

0.9269831078186896

In [161]:
ws = po_randomsearch.best_estimator_.w_
rets = returns_test.loc['2021-01-27'][po_randomsearch.best_estimator_.top_returns_.columns]
ans = 0
for w, ret in zip(ws, rets):
    ans += w * (1 + ret)

ans

1.0368521685176713

In [153]:
returns_test.loc['2021-01-26'][po_randomsearch.best_estimator_.top_returns_.columns]

TSLA    0.002600
MRNA    0.033536
ENPH   -0.075905
PENN   -0.043541
GME     0.926983
ETSY   -0.021071
CARR   -0.040376
PBI     0.810196
BIG     0.059345
RRC    -0.028981
BBBY    0.201789
EQT    -0.030361
BBWI   -0.021179
SIG     0.094476
NVDA   -0.015964
GNRC   -0.015648
MPWR   -0.020626
FDX    -0.015027
WST    -0.001930
PYPL   -0.027248
ALGN   -0.008100
AMD     0.006161
CNX    -0.049771
ABMD   -0.021335
POOL   -0.014256
SWN    -0.032256
AMZN    0.009755
DXCM   -0.039773
CZR    -0.051538
NOW    -0.010077
TWTR    0.038256
ROL    -0.002980
CTLT   -0.020569
MKTX    0.000903
FOSL    0.077524
Name: 2021-01-26 00:00:00, dtype: float64

In [72]:
returns_test.loc['2021-01-27'][po.top_returns_.columns]

NKTR    0.029609
ENPH   -0.044404
ALGN   -0.047572
FSLR   -0.017961
ETSY   -0.054005
PENN   -0.018094
NVDA   -0.038519
ANET   -0.004268
TTWO   -0.022023
CZR    -0.100721
HFC     0.030003
IPGP   -0.010449
PYPL   -0.045893
CPRI   -0.036280
NVR    -0.002358
MU     -0.055591
NRG    -0.031160
VRTX   -0.052134
WYNN   -0.048250
MTCH   -0.049636
BA     -0.039736
LRCX   -0.068083
CBOE   -0.040648
ANF    -0.061501
FMC    -0.025200
CAT    -0.035148
EL     -0.038560
ABBV   -0.055239
IR     -0.063771
ABMD   -0.036057
DHI    -0.040235
BBY    -0.009853
NOW    -0.029975
LYV    -0.002620
MAR    -0.024656
Name: 2021-01-27 00:00:00, dtype: float64