In [1]:
import numpy as np
import pandas as pd

import datetime

import yfinance as yf

from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import sklearn.metrics as metrics

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import scipy.optimize as opt

import tensorflow as tf
from tensorflow_addons.metrics import RSquare
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from keras.regularizers import L1L2

from IPython.display import display

2024-08-04 00:01:47.666414: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-04 00:01:47.871279: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-04 00:01:47.871450: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-04 00:01:47.905744: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-04 00:01:47.985152: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-04 00:01:47.986545: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 15
pd.options.display.max_rows = 60
pd.options.display.max_colwidth = 50
pd.DataFrame.iteritems = pd.DataFrame.items

In [3]:
# Pandas: числа в процентах
pd.options.display.float_format = '{:.2%}'.format

In [4]:
# Pandas: числа в десятичной записи
pd.reset_option('^display.', silent=True)

# Чтение и загрузка данных

In [5]:
def load_history(tickers, start, end=datetime.date.today()):
    info_table = pd.DataFrame()
    for ticker in tickers:
        ticker_ = yf.Ticker(ticker)
        info = pd.DataFrame.from_dict(ticker_.info, orient="index")
        info.reset_index(inplace=True)
        info.columns = ['Ind', 'Value']
        info['Ticker'] = ticker
    info_table = pd.concat([info_table, info])
    history = yf.download(tickers, start, end)
    history = history[['Adj Close', 'Volume', 'High', 'Low']]
    history = history.melt(ignore_index=False).reset_index()
    history = history[['Ticker', 'Date', 'Price', 'value']]
    history.columns = ['Ticker', 'Date', 'Ind', 'Value']
    return history, info_table

In [6]:
def load_sp500_index_structure(wiki_link='https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'):
    sp500_index_structure = pd.read_html(wiki_link)[0]
    sp500_index_structure.Symbol = sp500_index_structure.Symbol.apply(lambda x: x.replace('.', '-')).to_list()
    return sp500_index_structure

In [7]:
def load_sp500_values_returns(start, end=datetime.date.today()):
    values = yf.download('^GSPC', start=start, end=end, interval='1d')['Adj Close']
    return pd.DataFrame(values), values.pct_change()[1:]

In [8]:
START = '2013-01-01'
END = '2019-12-31'

In [12]:
sp500_index_structure = load_sp500_index_structure()
tickers = list(sp500_index_structure.Symbol)

In [13]:
sp500_index_structure.to_csv('SP500IndexStructure.csv', index=False)

In [14]:
history, info = load_history(tickers, START, END)
history.to_csv('history.csv', index=False)

[*********************100%%**********************]  503 of 503 completed

10 Failed downloads:
['SW', 'CARR', 'VLTO', 'GEV', 'GEHC', 'ABNB', 'SOLV', 'CEG', 'OTIS', 'KVUE']: Exception("%ticker%: Data doesn't exist for startDate = 1357016400, endDate = 1577768400")


In [15]:
history = history.dropna()

In [16]:
history.head()

Unnamed: 0,Ticker,Date,Ind,Value
0,A,2013-01-02,Adj Close,27.142788
1,A,2013-01-03,Adj Close,27.240002
2,A,2013-01-04,Adj Close,27.777933
3,A,2013-01-07,Adj Close,27.577028
4,A,2013-01-08,Adj Close,27.356667


In [17]:
sp500_index_values_all, sp500_index_retuns_all = load_sp500_values_returns(START, END)

[*********************100%%**********************]  1 of 1 completed


In [18]:
sp500_index_values_all.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2013-01-02,1462.420044
2013-01-03,1459.369995
2013-01-04,1466.469971
2013-01-07,1461.890015
2013-01-08,1457.150024


In [19]:
quotes = history[history.Ind == 'Adj Close']
quotes = quotes.pivot_table(values='Value', index='Date', columns='Ticker', aggfunc='first')
quotes = quotes.dropna(axis=1, how='any')

In [20]:
tickers = list(quotes.columns)

In [21]:
quotes.head()

Ticker,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WST,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,27.142788,13.179523,16.725039,21.801891,25.612389,14.793333,55.955956,38.34,33.990078,21.04842,...,25.900219,73.861305,18.716156,95.058746,18.827536,55.200218,23.618406,38.742626,59.842171,40.959999
2013-01-03,27.240002,12.877849,16.513931,21.621872,26.587339,14.75,55.75338,37.75,33.441605,20.879562,...,25.994001,74.615883,18.716156,96.08123,18.77253,55.100643,23.541111,38.984898,60.69957,41.0
2013-01-04,27.777933,13.886581,16.053944,21.348726,26.427521,14.876667,56.061295,38.130001,32.846775,21.452206,...,26.083101,74.594315,18.870253,97.24057,18.841295,55.355747,23.506752,39.411793,61.008957,40.669998
2013-01-07,27.577028,13.990285,15.9595,21.392176,26.643282,14.73,55.818203,37.939999,32.947212,20.563868,...,25.92366,74.378723,18.928038,97.570648,18.641872,54.714836,23.197571,39.163734,61.159245,40.900002
2013-01-08,27.356667,14.291959,16.002453,20.926596,26.651274,14.75,56.142296,38.139999,32.607292,20.806143,...,25.857996,74.012222,19.056448,97.490173,18.676266,55.057095,22.91415,37.519661,61.247597,40.93


In [22]:
comp_desc = sp500_index_structure[['Symbol', 'Security', 'GICS Sector']]
comp_desc.columns = ['TICKER', 'SECURITY', 'SECTOR']
comp_desc.set_index('TICKER', inplace = True)
comp_desc = comp_desc[comp_desc.index.isin(tickers)]
comp_desc = comp_desc.sort_values(by='TICKER')

In [23]:
comp_desc

Unnamed: 0_level_0,SECURITY,SECTOR
TICKER,Unnamed: 1_level_1,Unnamed: 2_level_1
A,Agilent Technologies,Health Care
AAL,American Airlines Group,Industrials
AAPL,Apple Inc.,Information Technology
ABBV,AbbVie,Health Care
ABT,Abbott,Health Care
...,...,...
XOM,ExxonMobil,Energy
XYL,Xylem Inc.,Industrials
YUM,Yum! Brands,Consumer Discretionary
ZBH,Zimmer Biomet,Health Care


In [24]:
returns = quotes.pct_change()[1:]

In [25]:
returns.head()

Ticker,A,AAL,AAPL,ABBV,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WST,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-03,0.003582,-0.02289,-0.012622,-0.008257,0.038066,-0.002929,-0.00362,-0.015389,-0.016136,-0.008022,...,0.003621,0.010216,0.0,0.010756,-0.002922,-0.001804,-0.003273,0.006253,0.014328,0.000977
2013-01-04,0.019748,0.078331,-0.027855,-0.012633,-0.006011,0.008588,0.005523,0.010066,-0.017787,0.027426,...,0.003428,-0.000289,0.008233,0.012066,0.003663,0.00463,-0.00146,0.01095,0.005097,-0.008049
2013-01-07,-0.007233,0.007468,-0.005883,0.002035,0.008164,-0.009859,-0.004336,-0.004983,0.003058,-0.04141,...,-0.006113,-0.00289,0.003062,0.003394,-0.010584,-0.011578,-0.013153,-0.006294,0.002463,0.005655
2013-01-08,-0.007991,0.021563,0.002691,-0.021764,0.0003,0.001358,0.005806,0.005272,-0.010317,0.011782,...,-0.002533,-0.004927,0.006784,-0.000825,0.001845,0.006255,-0.012218,-0.041979,0.001445,0.000733
2013-01-09,0.027008,-0.001979,-0.015628,0.005636,0.006597,0.003616,0.007072,0.013634,-0.002605,0.00494,...,0.003083,-0.000291,0.022911,0.003551,0.001472,-0.003843,0.015742,0.001777,0.018474,0.01075


Рассматриваем период с 2013-01-01 по 2019-11-27

In [26]:
recent_returns = returns.loc['2018-11-28':'2019-11-27']
prev_returns = returns.loc['2013-01-01':'2019-11-27']

In [27]:
sp500_index_value = sp500_index_values_all[sp500_index_values_all.index <= '2019-11-27']
sp500_index_value = sp500_index_value[sp500_index_value.index >= '2018-11-27']

In [28]:
sp500_index_returns = sp500_index_value.pct_change()[1:]

In [29]:
sp500_index_returns

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2018-11-28,0.022974
2018-11-29,-0.002183
2018-11-30,0.008171
2018-12-03,0.010941
2018-12-04,-0.032365
...,...
2019-11-21,-0.001583
2019-11-22,0.002175
2019-11-25,0.007507
2019-11-26,0.002196


# Расчет параметров кластеризации

In [30]:
def calculate_beta(ind_ret, ret):
    covar = np.cov(ret, ind_ret)
    beta = covar[0][1] / covar[1][1]
    return beta.squeeze()

In [31]:
def calculate_downside_sigma(returns):
    T = len(returns)
    return np.sqrt(((returns[returns < 0]) ** 2).sum() / T)

In [32]:
def calculate_measures(data, T, recent_returns, returns, index_returns):
    data['MEAN_RETURN'] = recent_returns.mean() * T
    data['STD'] = recent_returns.std() * np.sqrt(T)
    data['SEMI_DEVIATION'] = recent_returns.apply(calculate_downside_sigma, axis=0) * np.sqrt(T)
    data['VAR'] = abs(returns.quantile(0.05)) * T
    data['CVAR'] = abs(returns[returns < -data['VAR']/T].mean()) * T
    data['BETA'] = recent_returns.apply(calculate_beta, args=(index_returns,), axis=0)
    data['SHARPE_RATIO'] = data['MEAN_RETURN'] / data['STD']
    data['TREYNOR_RATIO'] = data['MEAN_RETURN'] / data['BETA']
    data['SORTINO_RATIO'] = data['MEAN_RETURN'] / data['SEMI_DEVIATION']
    return data

In [33]:
comp_desc = calculate_measures(comp_desc, 21, recent_returns, returns, sp500_index_returns.values.flatten())

In [34]:
comp_desc.head()

Unnamed: 0_level_0,SECURITY,SECTOR,MEAN_RETURN,STD,SEMI_DEVIATION,VAR,CVAR,BETA,SHARPE_RATIO,TREYNOR_RATIO,SORTINO_RATIO
TICKER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A,Agilent Technologies,Health Care,0.016612,0.075236,0.056296,0.47771,0.704138,0.394366,0.220799,0.042124,0.295086
AAL,American Airlines Group,Industrials,-0.015869,0.11294,0.083665,0.763176,1.093786,0.232852,-0.140512,-0.068153,-0.189678
AAPL,Apple Inc.,Information Technology,0.040572,0.083188,0.057026,0.514674,0.764497,0.410359,0.487715,0.098869,0.711466
ABBV,AbbVie,Health Care,0.00877,0.083068,0.066192,0.551972,0.84249,0.250555,0.105581,0.035004,0.1325
ABT,Abbott,Health Care,0.020355,0.063963,0.04476,0.412937,0.620907,0.497918,0.318233,0.040881,0.45476


# Кластеризация

In [35]:
def kmeans(data, n_clusters, max_iter=1000, n_init=10, random_state=0):
    scaler = MinMaxScaler()
    if 'CLUSTER' in data.columns:
        data = data.drop('CLUSTER', axis=1)
    kmeans = KMeans(n_clusters=n_clusters, max_iter=max_iter, n_init=n_init, random_state=random_state)
    kmeans.fit(scaler.fit_transform(data[data.columns]))
    data['CLUSTER'] = kmeans.labels_.astype(int)
    data = data.sort_values(by='CLUSTER')
    data.CLUSTER = data.CLUSTER.astype(str)
    return data

In [36]:
def plot_kmeans(data, dimensions, size_col, title, height=600, width=600):
    fig = px.scatter_matrix(data,
                            dimensions=dimensions,
                            color='CLUSTER',
                            title=title,
                            color_discrete_sequence=px.colors.qualitative.Dark24,
                            size=data[size_col]+abs(min(data[size_col]))+0.1)
    fig.update_traces(diagonal_visible=False, showupperhalf=False)
    fig.update_layout(height=height, width=width)
    fig.show()

### Кластеризация отраслей

In [37]:
sect_desc = comp_desc.drop('SECURITY', axis=1).groupby(['SECTOR']).agg(np.mean)

In [38]:
sect_desc

Unnamed: 0_level_0,MEAN_RETURN,STD,SEMI_DEVIATION,VAR,CVAR,BETA,SHARPE_RATIO,TREYNOR_RATIO,SORTINO_RATIO
SECTOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Communication Services,0.019395,0.079309,0.052271,0.507904,0.766997,0.306009,0.239107,0.089536,0.390724
Consumer Discretionary,0.018184,0.084373,0.058618,0.521236,0.813693,0.288944,0.241055,0.065519,0.374537
Consumer Staples,0.013717,0.067471,0.047293,0.389394,0.62413,0.275623,0.222179,0.056159,0.345167
Energy,-0.005183,0.095348,0.067697,0.656365,0.965205,0.276939,-0.022679,-0.048135,-0.022667
Financials,0.019923,0.065541,0.04565,0.431461,0.641647,0.438121,0.310519,0.051102,0.459156
Health Care,0.016683,0.082118,0.057878,0.483397,0.751862,0.305823,0.20881,0.066049,0.311867
Industrials,0.020418,0.075515,0.051866,0.465484,0.707126,0.389741,0.282234,0.060757,0.432088
Information Technology,0.03062,0.094033,0.063421,0.577551,0.898133,0.323511,0.320855,0.160582,0.492256
Materials,0.014348,0.079421,0.054555,0.535138,0.789419,0.307944,0.222939,0.008075,0.34304
Real Estate,0.018472,0.057205,0.04048,0.42846,0.62224,0.300065,0.322775,0.069098,0.466068


In [39]:
sect_clust = kmeans(sect_desc, 5)

In [40]:
sect_clust

Unnamed: 0_level_0,MEAN_RETURN,STD,SEMI_DEVIATION,VAR,CVAR,BETA,SHARPE_RATIO,TREYNOR_RATIO,SORTINO_RATIO,CLUSTER
SECTOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Financials,0.019923,0.065541,0.04565,0.431461,0.641647,0.438121,0.310519,0.051102,0.459156,0
Industrials,0.020418,0.075515,0.051866,0.465484,0.707126,0.389741,0.282234,0.060757,0.432088,0
Energy,-0.005183,0.095348,0.067697,0.656365,0.965205,0.276939,-0.022679,-0.048135,-0.022667,1
Information Technology,0.03062,0.094033,0.063421,0.577551,0.898133,0.323511,0.320855,0.160582,0.492256,2
Consumer Staples,0.013717,0.067471,0.047293,0.389394,0.62413,0.275623,0.222179,0.056159,0.345167,3
Real Estate,0.018472,0.057205,0.04048,0.42846,0.62224,0.300065,0.322775,0.069098,0.466068,3
Utilities,0.013791,0.060207,0.042776,0.37991,0.584394,0.248489,0.311439,-0.029544,0.443664,3
Communication Services,0.019395,0.079309,0.052271,0.507904,0.766997,0.306009,0.239107,0.089536,0.390724,4
Consumer Discretionary,0.018184,0.084373,0.058618,0.521236,0.813693,0.288944,0.241055,0.065519,0.374537,4
Health Care,0.016683,0.082118,0.057878,0.483397,0.751862,0.305823,0.20881,0.066049,0.311867,4


In [41]:
plot_kmeans(sect_clust, ['CVAR', 'MEAN_RETURN', 'SEMI_DEVIATION', 'TREYNOR_RATIO'], 'SHARPE_RATIO', 'Кластеризация секторов')

In [42]:
sectors_choice = sect_clust[sect_clust.CLUSTER.isin(['2', '3'])]

In [43]:
sectors_choice

Unnamed: 0_level_0,MEAN_RETURN,STD,SEMI_DEVIATION,VAR,CVAR,BETA,SHARPE_RATIO,TREYNOR_RATIO,SORTINO_RATIO,CLUSTER
SECTOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Information Technology,0.03062,0.094033,0.063421,0.577551,0.898133,0.323511,0.320855,0.160582,0.492256,2
Consumer Staples,0.013717,0.067471,0.047293,0.389394,0.62413,0.275623,0.222179,0.056159,0.345167,3
Real Estate,0.018472,0.057205,0.04048,0.42846,0.62224,0.300065,0.322775,0.069098,0.466068,3
Utilities,0.013791,0.060207,0.042776,0.37991,0.584394,0.248489,0.311439,-0.029544,0.443664,3


### Кластеризация активов

In [44]:
def companies_clusterization(data, sector, n_clust, dimensions):
    clusters = kmeans(data[data['SECTOR'] == sector].drop(['SECURITY', 'SECTOR'], axis=1), n_clust)
    plot_kmeans(clusters, dimensions, 'SHARPE_RATIO', sector)
    return clusters

In [45]:
ch_tickers = {}

In [46]:
sector = 'Consumer Staples'
n_clust = 10
clusters = companies_clusterization(comp_desc, sector, n_clust, ['CVAR', 'MEAN_RETURN', 'SEMI_DEVIATION', 'TREYNOR_RATIO'])
ch_tickers[sector] = clusters[clusters.CLUSTER.isin(['3', '8'])]

In [47]:
sector = 'Real Estate'
n_clust = 9
clusters = companies_clusterization(comp_desc, sector, n_clust, ['CVAR', 'MEAN_RETURN', 'SEMI_DEVIATION', 'TREYNOR_RATIO'])
ch_tickers[sector] = clusters[clusters.CLUSTER.isin(['0'])]

In [48]:
sector = 'Utilities'
n_clust = 7
clusters = companies_clusterization(comp_desc, sector, n_clust, ['CVAR', 'MEAN_RETURN', 'SEMI_DEVIATION', 'TREYNOR_RATIO'])
ch_tickers[sector] = clusters[clusters.CLUSTER.isin(['3'])]

In [49]:
sector = 'Information Technology'
n_clust = 10
clusters = companies_clusterization(comp_desc, sector, n_clust, ['CVAR', 'MEAN_RETURN', 'SEMI_DEVIATION', 'TREYNOR_RATIO'])
ch_tickers[sector] = clusters[clusters.CLUSTER.isin(['1'])]

In [50]:
ch_tickers_list = []
for item in ch_tickers.values():
    ch_tickers_list += list(item.index)
ch_comp_desc = comp_desc[comp_desc.index.isin(ch_tickers_list)]

In [51]:
ch_comp_desc

Unnamed: 0_level_0,SECURITY,SECTOR,MEAN_RETURN,STD,SEMI_DEVIATION,VAR,CVAR,BETA,SHARPE_RATIO,TREYNOR_RATIO,SORTINO_RATIO
TICKER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ARE,Alexandria Real Estate Equities,Real Estate,0.027195,0.051244,0.035715,0.392445,0.564157,0.420039,0.530695,0.064744,0.761437
AVB,AvalonBay Communities,Real Estate,0.015054,0.046477,0.034693,0.376469,0.528247,0.392595,0.323904,0.038345,0.433926
CCI,Crown Castle,Real Estate,0.020388,0.05463,0.037618,0.363587,0.553339,0.226165,0.3732,0.090146,0.54197
CPT,Camden Property Trust,Real Estate,0.018737,0.046601,0.034499,0.378886,0.544645,0.353491,0.402084,0.053007,0.543135
EPAM,EPAM Systems,Information Technology,0.049419,0.092788,0.066067,0.611301,0.965306,0.308676,0.532604,0.160101,0.748021
EQR,Equity Residential,Real Estate,0.020527,0.045777,0.033989,0.388208,0.540787,0.358028,0.448404,0.057332,0.603924
ESS,Essex Property Trust,Real Estate,0.019374,0.0474,0.034565,0.396528,0.54203,0.354658,0.408733,0.054627,0.560501
ETR,Entergy,Utilities,0.029308,0.043975,0.028606,0.366531,0.527239,0.218476,0.666475,0.13415,1.024551
FICO,Fair Isaac,Information Technology,0.065429,0.093606,0.060995,0.484328,0.733281,0.274291,0.698978,0.238537,1.072685
HSY,Hershey's,Consumer Staples,0.030732,0.049458,0.031959,0.335626,0.521906,0.194258,0.621379,0.158201,0.961608


In [52]:
cumret = (1 + returns[returns.index <= '2019-11-27']).cumprod() - 1

In [53]:
cumret = (1 + returns[returns.index <= '2019-11-27']).cumprod() - 1
fig = go.Figure()
for ticker in ch_comp_desc.index:
    fig.add_trace(go.Scatter(x=cumret.index, y=cumret[ticker],
                             mode='lines',
                             name=ticker))
fig.update_layout(title='Кумулятивная доходность акций',
                  xaxis_title="Время",
    yaxis_title="Накопленная доходность",
            legend_title="Акции",
    height=800)
fig.show()


# Обучение моделей

In [54]:
class TickerModel:
    def __init__(self, ticker, data):
        self.ticker = ticker
        self.data = data
        
        
    def preprocessing(self, input_steps, output_steps, val_share, test=None):
        self.input_steps = input_steps
        self.output_steps = output_steps
        self.val_share = val_share
        self.train_point = int(len(self.data) * (1 - self.val_share))
        self.val_point = - int(len(self.data) * self.val_share)
        
        self.prep_data = np.log(self.data)
        self.scaler = MinMaxScaler()
        self.scaler.fit(self.prep_data.loc[:self.train_point])
        self.prep_data = self.scaler.transform(self.prep_data)
        
        self.X_train, self.y_train = TickerModel.to_sequences(self.prep_data[:self.train_point], input_steps, output_steps)
        self.X_validation, self.y_validation = TickerModel.to_sequences(self.prep_data, input_steps, output_steps)
        if test:
            self.X_test, self.y_test = np.array([self.X_validation[test:]]), np.array([self.y_validation[test:]])
            self.X_validation, self.y_validation = self.X_validation[self.val_point:test], self.y_validation[self.val_point:test]
        else:
            self.X_validation, self.y_validation = self.X_validation[self.val_point:], self.y_validation[self.val_point:]
            
          
    def to_sequences(data, input_steps, steps_out):
        x = []
        y = []
        for i in range(len(data)-input_steps-steps_out+1):
            window = data[i:(i+input_steps)]
            windowI = data[i+input_steps:i+input_steps+steps_out]
            x.append(window)
            y.append(windowI[:, 0])
        return np.array(x), np.array(y)
        
    def LSTM(self, N1, N2, batch, dropout, l1, l2):
        reg = L1L2(l1=l1, l2=l2)
        self.loss = []
        self.val_loss = []
        self.model = Sequential()
        self.model.add(LSTM(N1,
                            return_sequences=True,
                            kernel_initializer = "glorot_uniform", 
                            batch_input_shape = (batch, self.input_steps, self.X_train.shape[2]),
                            bias_regularizer=reg))
        self.model.add(Dropout(dropout))
        self.model.add(LSTM(N2,
                            kernel_initializer = "glorot_uniform",
                            bias_regularizer=reg))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(self.output_steps))
        self.model.compile(optimizer = tf.keras.optimizers.Adam(1e-3),
                           loss = 'mse', 
                           metrics = [tf.keras.metrics.RootMeanSquaredError(),
                                      tf.keras.metrics.MeanAbsoluteError(), 
                                      tf.keras.metrics.MeanAbsolutePercentageError(),
                                      RSquare()])
        
           
    def fit(self, batch, epochs):
        callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                              patience=4,
                              verbose=1,
                              restore_best_weights=True),
                    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                factor=0.1, 
                                patience= 2, 
                                verbose=1, 
                                min_delta=1e-5, 
                                mode='min')]
        history = self.model.fit(self.X_train, 
                                 self.y_train, 
                                 epochs = epochs, 
                                 batch_size = batch, 
                                validation_data=(self.X_validation, self.y_validation),
                                callbacks=callbacks)
        self.loss = self.loss + history.history["loss"]
        self.val_loss = self.val_loss + history.history["val_loss"]

    
    def evaluate(self):
        train = self.model.evaluate(self.X_train, self.y_train, verbose = 0)
        validation = self.model.evaluate(self.X_validation, self.y_validation, verbose = 0)
        self.summary = pd.DataFrame([train, validation], 
                                        index = ["Train", "Test"], 
                                        columns = ["MSE", 'RMSE', "MAE", "MAPE", "R2"])
        
    def save(self, note=''):
        self.model.save(self.ticker + str(note) + ".keras")
        
        
    def load(self, note=''):
        self.model = tf.keras.models.load_model(self.ticker + str(note) + ".keras")
    
    def scale_predict(self, X):
        X = np.log(X)
        X = self.scaler.transform(X)
        X, _ = TickerModel.to_sequences(X, 252, 0)
        pred = self.model.predict(X, verbose=0)
        pred = pd.DataFrame(np.tile(pred, (X.shape[2], 1))).transpose()
        pred = self.scaler.inverse_transform(pred)
        pred = np.exp(pred[:, 0])
        return pred
    
    def predict(self, X):
        pred = self.model.predict(X, verbose=0)
        pred = pd.DataFrame(np.tile(pred, (X.shape[2], 1))).transpose()
        pred = self.scaler.inverse_transform(pred)
        pred = np.exp(pred[:, 0])
        return pred


In [55]:
%%time
# Fit and save
models = []
for ticker in ch_comp_desc.index:
    print(ticker)
    ticker_data = history[history.Ticker == ticker]
    ticker_data = ticker_data[['Date', 'Ind', 'Value']]
    ticker_data.index = ticker_data.Date
    ticker_data = ticker_data.pivot_table(values='Value', index=ticker_data.index, columns='Ind', aggfunc='first').reset_index(drop=True)
    ticker_data = ticker_data[['Adj Close', 'Volume', 'High', 'Low']]
    model = TickerModel(ticker, ticker_data)
    model.preprocessing(252, 21, 0.2, -1)
    try:
        model.load('')
    except:
        model.LSTM(64, 64, 32, 0.2, 0.001, 0.001)
        model.fit(32, 100)
        model.save('')
    model.evaluate()
    models.append(model)
    

ARE
AVB
CCI
CPT
EPAM
EQR
ESS
ETR
FICO
HSY
JBL
KLAC
LRCX
MAA
NEE
SO
SRE
TER
TGT
TSN
UDR
CPU times: user 2min 36s, sys: 21.7 s, total: 2min 57s
Wall time: 2min 14s


In [56]:
def models_summary(models):
    summs = pd.DataFrame()
    for model in models:
        summ = model.summary
        summ['TICKER'] = [model.ticker, model.ticker]
        summs = pd.concat([summs, summ])
    summs['SAMPLE'] = summs.index
    summs = summs.reset_index(drop=True)
    summs = summs[['TICKER', 'SAMPLE', 'MSE', 'RMSE', 'MAE', 'MAPE', 'R2']]
    summs = summs.set_index(['TICKER', 'SAMPLE'])
    summs.columns = ['MSE', 'RMSE', 'MAE', 'MAPE, %', 'R\u00b2']
    return summs

In [57]:
summ = models_summary(models)

In [58]:
summ

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE,RMSE,MAE,"MAPE, %",R²
TICKER,SAMPLE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ARE,Train,0.001852,0.042242,0.031704,6.831201,0.968613
ARE,Test,0.001471,0.037462,0.026558,2.556861,0.893594
AVB,Train,0.004361,0.052945,0.040939,8.033228,0.932707
AVB,Test,0.005577,0.0634,0.052251,4.820434,0.764353
CCI,Train,0.002155,0.046015,0.035352,9.476138,0.965372
CCI,Test,0.003697,0.060497,0.046796,3.93649,0.860113
CPT,Train,0.002703,0.046906,0.035661,7.230665,0.952976
CPT,Test,0.004216,0.060937,0.050043,4.33259,0.756901
EPAM,Train,0.001304,0.035716,0.027063,5.018954,0.95771
EPAM,Test,0.001343,0.036254,0.027764,2.647833,0.841072


# Линейная оптимизация

In [59]:
def portfolio_std(weights, cov, T):
    return np.sqrt(weights @ cov @ weights) * np.sqrt(T)

In [60]:
def portfolio_return(weights, returns):
    return weights @ returns

In [61]:
def efficient_portfolio(target_return, returns, cov, T, init_boundaries=(0,1)):
    init = np.ones(len(returns)) / len(returns)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x)-1},
                   {'type': 'eq', 'fun': lambda x: x @ returns - target_return})
    result = opt.minimize(fun=portfolio_std,
                          x0=init,
                          args=(cov, T),
                          method='SLSQP',
                          bounds=tuple(init_boundaries for _ in range(len(returns))),
                          constraints=constraints
                          )
    return result['fun'], result['x']

In [62]:
def generate(n, returns, cov, T):
    gen = []
    for i in range(n):
        weights = np.random.random(len(returns))
        weights /= np.sum(weights)
        std = portfolio_std(weights, cov, T)
        ret = portfolio_return(weights, returns)
        sr = ret / std
        gen.append([ret, std, sr])
    gen = pd.DataFrame(gen)
    return gen

In [63]:
def utility_function(return_, std, coef):
    return return_ - coef * (std ** 2)

In [64]:
def utility_reverse(std, U, coef):
    return U + coef * (std ** 2)

In [65]:
def gen_u_line(util, A, min_std, max_std, step):
    u_line = []
    u_std = np.arange(min_std, max_std, step)
    u_ret = utility_reverse(u_std, util, A)
    u_line.append(u_ret)
    u_line.append(u_std)
    u_line = pd.DataFrame(u_line)
    return u_line.transpose()

In [66]:
def select_portfolio(frontier, pred_returns, cov, T, gen_n, A, min_std, max_std, std_step=0.0001):
    max_util = frontier.UTILITY.max()
    gen = generate(gen_n, pred_returns, cov, T)
    u_line = gen_u_line(max_util, A, min_std, max_std, std_step)
    portfolio = frontier[frontier.UTILITY == frontier.UTILITY.max()]
    fig = px.scatter(x=gen[1], y=gen[0], color=gen[2], labels = {'color': "Коэффициент<br>Шарпа"})
    fig.add_scatter(x=frontier.STD, y=frontier.RETURN, mode='lines', name='Граница эффективных портфелей')
    fig.add_scatter(x=u_line[1], y=u_line[0], mode='lines', name='Кривая безразличия')
    fig.add_scatter(x=portfolio.STD, y=portfolio.RETURN, mode='markers', name='Оптимальный портфель', marker=dict(size=15))
    fig.update_layout(
        xaxis_title='Риск портфеля',
        yaxis_title='Доходность портфеля',
        height=800,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        font = dict(size=18)
    )
    fig.show()
    return portfolio

In [67]:
def optimize_porfolios(min_return, max_return, step, returns, cov, T, A, init_boundaries=(0, 1)):
    frontier = []
    target_return = np.arange(min_return, max_return, step)
    for i in range(len(target_return)):
        std, w = efficient_portfolio(target_return[i], returns, cov, T, init_boundaries)
        frontier.append([target_return[i], std, w, utility_function(target_return[i], std, A)])
    frontier = pd.DataFrame(frontier)
    frontier.columns = ['RETURN', 'STD', 'WEIGHTS', 'UTILITY']
    frontier['SHARPE_RATIO'] = frontier.RETURN / frontier.STD
    return frontier

In [68]:
def plot_weights(weights, names):
    fig = px.pie(values=weights, names=names, title='Доля акций в портфеле')
    fig.update_traces(textposition='inside')
    fig.update_layout(height=600, width=600)
    fig.show()

In [69]:
real_quotes = quotes.loc['2019-11-27':'2019-12-30', ch_comp_desc.index]
real_last_quote = real_quotes.head(1)
real_returns = real_quotes.pct_change()[1:]
real_cum_returns = (1 + real_returns).cumprod() - 1
real_return = real_cum_returns.tail(1)
real_return = np.array(real_return)[0]

In [70]:
real_return

array([-0.00990728, -0.0298178 ,  0.06646654, -0.05451683, -0.01182934,
       -0.05733973, -0.04233762,  0.02018477,  0.01389153, -0.01171234,
        0.05390272,  0.0767884 ,  0.08437489, -0.04784474,  0.03526828,
        0.01653026,  0.03022904,  0.07487898,  0.02374902,  0.00277492,
       -0.03539456])

In [71]:
pred_quotes = []
for model in models:
    pred_quotes.append(model.predict(model.X_test[0]))



In [72]:
pred_quotes = pd.DataFrame(pred_quotes).transpose()
pred_quotes.columns = real_last_quote.columns
pred_quotes = pd.concat([real_last_quote, pred_quotes])
pred_returns = pred_quotes.pct_change()[1:]
pred_cum_returns = (1 + pred_returns).cumprod() - 1
pred_return = pred_cum_returns.loc[20]
pred_return = np.array(pred_return)

In [73]:
tmp = pred_quotes.transpose()
tmp = real_last_quote.transpose().join(tmp[20])
tmp.columns = [0, 1]
tmp[2] = (tmp[1] - tmp[0]) / tmp[0] * 100
tmp[0] = tmp[0].round(2)
tmp[1] = tmp[1].round(2)
tmp[2] = tmp[2].round(2)
tmp.columns = pd.MultiIndex.from_tuples([('Котировка', '2019-11-27'), ('Котировка', '2019-12-30'), ('Доходность, %', '')])
tmp.index.names = ['Тикер']
tmp[13:]

Unnamed: 0_level_0,Котировка,Котировка,"Доходность, %"
Unnamed: 0_level_1,2019-11-27,2019-12-30,Unnamed: 3_level_1
Тикер,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
MAA,117.26,102.25,-12.8
NEE,52.49,54.34,3.52
SO,51.93,45.17,-13.01
SRE,63.15,60.32,-4.49
TER,62.77,50.0,-20.34
TGT,113.84,84.32,-25.93
TSN,79.33,74.3,-6.35
UDR,40.32,38.34,-4.92


In [74]:
pred_return

array([-0.01169358, -0.07126158, -0.02511075, -0.06802185,  0.00143828,
       -0.13490122, -0.05905879, -0.21775433, -0.00972746, -0.13681265,
       -0.00999311, -0.01498815,  0.03372253, -0.12799876,  0.03522955,
       -0.13011748, -0.04485266, -0.20339056, -0.25931815, -0.06345781,
       -0.04915958])

In [75]:
returns_before_forecast = quotes[ch_comp_desc.index]
returns_before_forecast = returns_before_forecast[-273:-21].pct_change()[1:]
returns_before_forecast = np.array(returns_before_forecast.transpose())
cov = np.cov(returns_before_forecast)

In [76]:
cov

array([[ 1.24892400e-04,  8.91959294e-05,  6.76633924e-05,
         8.60901967e-05,  9.13955078e-05,  8.53904383e-05,
         8.37632666e-05,  4.71659937e-05,  7.84751916e-05,
         4.06422442e-05,  4.97253079e-05,  7.24529523e-05,
         6.70975557e-05,  8.56443697e-05,  4.72231129e-05,
         4.39194648e-05,  4.62162754e-05,  4.98349164e-05,
         4.26210292e-05,  4.24863276e-05,  9.27736098e-05],
       [ 8.91959294e-05,  1.03221309e-04,  7.17426081e-05,
         8.97181624e-05,  6.18426498e-05,  9.05849451e-05,
         9.01367168e-05,  5.18384087e-05,  6.19426427e-05,
         4.63051729e-05,  3.27073919e-05,  5.30493899e-05,
         4.21016993e-05,  8.72689924e-05,  5.03146586e-05,
         4.81797242e-05,  4.80693555e-05,  3.36237005e-05,
         3.87224800e-05,  3.26975520e-05,  9.61586023e-05],
       [ 6.76633924e-05,  7.17426081e-05,  1.41911045e-04,
         7.02220447e-05,  6.41801332e-05,  7.10644178e-05,
         7.13702167e-05,  5.34643100e-05,  7.38664110e

In [77]:
A = 100

In [78]:
frontier = optimize_porfolios(-0.041, 0.033, 0.0005, pred_return, cov, 21, A, init_boundaries=(0, 1))

In [79]:
portfolio = select_portfolio(frontier, pred_return, cov, 21, 20000, A, 0.03, 0.04)

In [80]:
portfolio

Unnamed: 0,RETURN,STD,WEIGHTS,UTILITY,SHARPE_RATIO
117,0.0175,0.036554,"[0.09655070397344623, 3.2526065174565133e-19, ...",-0.116121,0.478741


In [81]:
portfolio_info = ch_comp_desc[['SECURITY', 'SECTOR']]
portfolio_info['0'] = portfolio.WEIGHTS.values[0]
portfolio_info.columns = ['Акция', 'Сектор', 'Доля']
portfolio_info = portfolio_info[portfolio_info['Доля'] > 1e-16]
portfolio_info = portfolio_info.sort_values(by='Доля', ascending=False)
portfolio_info = portfolio_info.rename_axis('Тикер')
portfolio_info

Unnamed: 0_level_0,Акция,Сектор,Доля
Тикер,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
NEE,NextEra Energy,Utilities,0.648295
JBL,Jabil,Information Technology,0.122202
ARE,Alexandria Real Estate Equities,Real Estate,0.096551
CCI,Crown Castle,Real Estate,0.046437
TSN,Tyson Foods,Consumer Staples,0.042133
LRCX,Lam Research,Information Technology,0.024377
EPAM,EPAM Systems,Information Technology,0.020006


In [82]:
plot_weights(portfolio_info['Доля'], list(portfolio_info.index))

In [83]:
weights = portfolio.WEIGHTS.values[0]
weights

array([9.65507040e-02, 3.25260652e-19, 4.64367015e-02, 0.00000000e+00,
       2.00064739e-02, 5.58364119e-18, 0.00000000e+00, 0.00000000e+00,
       3.57786717e-18, 0.00000000e+00, 1.22201740e-01, 1.95156391e-18,
       2.43766237e-02, 9.21571847e-19, 6.48294568e-01, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 8.78203760e-18, 4.21331886e-02,
       0.00000000e+00])

# Тестирование параметров оптимального портфеля

In [84]:
def GBM(mu, sigma, dt, M, n_forecast):
    S0 = 1
    St = np.exp(
        (mu - sigma ** 2 / 2) * dt
        + sigma * np.random.normal(0, np.sqrt(dt), size=(M, n_forecast)).T
    )
    St = np.vstack([np.ones(M), St])
    St = S0 * St.cumprod(axis=0)
    return St.flatten()

In [85]:
def simulate_GBM(returns, n_forecast):
    point = np.random.randint(0, len(returns)-n_forecast)
    returns = returns[point:point+n_forecast]
    mu = returns.mean()
    sigma = returns.std()
    return GBM(mu, sigma, 1, 1, n_forecast)[1:]

In [86]:
def simulate_portfolio(returns, weights, n_forecast):
    simulated_GBM = []
    for col in returns:
        simulated_GBM.append(simulate_GBM(returns[col], n_forecast))
    simulated_GBM = np.array(simulated_GBM).T
    return simulated_GBM @ weights

In [87]:
def simulate_portfolios(returns, weights, n_forecast, n_iterations):
    simulated_portfolios = []
    for i in range(n_iterations):
        simulated_portfolios.append(simulate_portfolio(returns, weights, n_forecast).T)
    simulated_portfolios = pd.DataFrame(simulated_portfolios).transpose()
    return simulated_portfolios

In [88]:
def calculate_cvar(returns, var):
    low = returns[returns <= var]
    return np.mean(low)

In [89]:
def test_porfolio(simulated_portfolios):
    cum_prod_simulated_portfolios = simulated_portfolios - 1
    portfolio_avg = cum_prod_simulated_portfolios.apply(lambda x : np.mean(x), axis=1)
    portfolio_5th = cum_prod_simulated_portfolios.apply(lambda x : np.percentile(x, 5), axis=1)
    portfolio_95th = cum_prod_simulated_portfolios.apply(lambda x: np.percentile(x, 95), axis=1)
    var5 = -portfolio_5th
    cvar5 = []
    for i in range(len(var5)):
        cvar5.append(calculate_cvar(np.array(cum_prod_simulated_portfolios.iloc[i]), -var5[i]))
    cvar5 = -np.array(cvar5)
    var1 = -cum_prod_simulated_portfolios.apply(lambda x : np.percentile(x, 1), axis=1)
    cvar1 = []
    for i in range(len(var1)):
        cvar1.append(calculate_cvar(np.array(cum_prod_simulated_portfolios.iloc[i]), -var1[i]))
    cvar1 = -np.array(cvar1)
    return cum_prod_simulated_portfolios, portfolio_avg, portfolio_5th, portfolio_95th, var1, var5, cvar1, cvar5

In [90]:
def plot_simulations(cum_prod_simulated_portfolios, T, n):
    x = [x for x in range(T)]
    fig = go.Figure()
    for i in range(n):
        fig.add_trace(go.Scatter(x=x, y=cum_prod_simulated_portfolios[i]))
    fig.update_layout(showlegend=False,
                    height=800,
                    title='Результаты симуляции',
                    xaxis_title='Дни прогноза',
        yaxis_title='Накопленная доходность портфеля',
        font = dict(size=18))
    fig.show()

In [91]:
def plot_predict(time, portfolio_avg, portfolio_low, portfolio_high, portfolio_pred):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=time, y=portfolio_low, line_color="#a7d5ed", showlegend=False))
    fig.add_trace(go.Scatter(x=time, y=portfolio_high, fill='tonexty', line_color="#a7d5ed", showlegend=False))
    fig.add_trace(go.Scatter(x=time, y=portfolio_pred, name='Прогноз<br>LSTM', line_color='#d45087'))
    fig.add_trace(go.Scatter(x=time, y=portfolio_avg, name='Базовый<br>сценарий', line_color='#003f5c'))
    fig.update_layout(
        title='Тестирование оптимального портфеля',
        xaxis_title='Дата',
        yaxis_title='Накопленная доходность портфеля',
        height=800,
        font = dict(size=18))
    fig.show()

In [92]:
def plot_var(time, var1, cvar1, var5, cvar5):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=time, y=cvar1 * 100, name='CVaR 1%'))
    fig.add_trace(go.Scatter(x=time, y=var1 * 100, name='VaR 1%'))
    fig.add_trace(go.Scatter(x=time, y=cvar5 * 100, name='CVaR 5%'))
    fig.add_trace(go.Scatter(x=time, y=var5 * 100, name='VaR 5%'))
    fig.update_layout(
        title='VaR, CVaR',
        xaxis_title='Дата',
        yaxis_title='Потенциальный убыток, %',
        height=800,
        font=dict(size=18))
    fig.show()

In [93]:
def plot_probability(cum_prod_simulated_portfolios, target_return):
    target_prob_port = cum_prod_simulated_portfolios.apply(
      lambda x : np.mean(x >= target_return), axis=1)
    fig = go.Figure()
    fig.add_trace(go.Bar(x=target_prob_port.index+1, y=target_prob_port.values * 100))
    fig.update_traces(marker_color="#54bebe")
    fig.update_layout(
        title='Вероятность достичь целевой доходности',
        xaxis_title='Дни прогноза',
        yaxis_title='Вероятность, %',
        height=800,
        font=dict(size=18))
    fig.show()

In [94]:
N_FORECAST = 21
N_ITERATIONS = 2000

In [95]:
ch_returns = returns[ch_comp_desc.index]
ch_returns = ch_returns[:-21]

In [96]:
portfolio_returns = portfolio_return(weights, pred_cum_returns.transpose())

In [97]:
simulated_portfolios = simulate_portfolios(ch_returns, weights, N_FORECAST, N_ITERATIONS)

In [98]:
cum_prod_simulated_portfolios, portfolio_avg, portfolio_5th, portfolio_95th, var1, var5, cvar1, cvar5 = test_porfolio(simulated_portfolios)

In [99]:
plot_simulations(cum_prod_simulated_portfolios, 21, 1000)

In [100]:
plot_predict(portfolio_returns.index, portfolio_avg, portfolio_5th, portfolio_95th, portfolio_returns)

In [101]:
plot_var(portfolio_returns.index, var1, cvar1, var5, cvar5)

In [102]:
plot_probability(cum_prod_simulated_portfolios, portfolio.RETURN.values[0])

# Сравнение

In [106]:
real_portfolio_returns = portfolio_return(weights, real_cum_returns.transpose())

In [107]:
x = real_returns.index
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=portfolio_5th, line_color="#a7d5ed", showlegend=False))
fig.add_trace(go.Scatter(x=x, y=portfolio_95th, fill='tonexty', line_color="#a7d5ed", showlegend=False))
fig.add_trace(go.Scatter(x=x, y=portfolio_avg, name='Базовый<br>сценарий', line_color='#003f5c'))
fig.add_trace(go.Scatter(x=x, y=portfolio_returns, name='Прогноз<br>LSTM', line_color='#d45087'))
fig.add_trace(go.Scatter(x=x, y=real_portfolio_returns, name='Действительные<br>значения', line_color='#665191'))
fig.update_layout(title='Тестирование оптимального портфеля',
                  xaxis_title='Дата',
    yaxis_title='Накопленная доходность портфеля',
    height=800,
    font=dict(size=18))
fig.show()

In [108]:
x = real_returns.index
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=portfolio_return(weights, real_returns.transpose()), name='Действительные<br>значения',
                        line_color='#665191'))
fig.add_trace(go.Scatter(x=x, y=portfolio_return(weights, pred_returns.transpose()), name='Прогноз<br>LSTM', line_color='#d45087'))
fig.update_layout(title='Тестирование оптимального портфеля',
                  xaxis_title='Дата',
    yaxis_title='Доходность портфеля',
    height=500,
    font=dict(size=(18)))
fig.show()