In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import warnings

%load_ext autoreload
%autoreload 2

from src.features import *
from src.utils import *
from src.models import *
from src.optimizers import *
from src.loss_functions import *
from src.dataloaders import *
from src.security import NEPTUNE_TOKEN
from src.pipeline_functions import *

plt.style.use("ggplot")
# show all columns
pd.set_option("display.max_columns", None)

warnings.filterwarnings("ignore")

In [2]:
stocks = ["SPY", "VTV", "VUG", "VYM", "QQQ", "VNQ", "GLD", "AGG", "SOXX", "OIH", "IYT", "XLF", "XLV", "VOX", "XLY", "XLP", "BND", "BSV", "IEMG", "VEA", "VWO"]
stocks = sorted(stocks)

stock_classes = {'AGG': 'EQUITY', 'GLD': 'COMMODITY', 'IYT': 'EQUITY', 'OIH': 'EQUITY', 'QQQ': 'EQUITY',
                 'SOXX': 'EQUITY', 'SPY': 'EQUITY', 'VNQ': 'REAL_ESTATE', 'VOX': 'EQUITY', 'VTV': 'EQUITY',
                 'VUG': 'EQUITY', 'VYM': 'EQUITY', 'XLF': 'EQUITY', 'XLP': 'EQUITY', 'XLV': 'EQUITY', 'XLY': 'EQUITY',
                 "BND": "FIXED_INCOME", "BSV": "FIXED_INCOME", "IEMG": "EQUITY", "VEA": "EQUITY", "VWO": "EQUITY"
}

classes_color = {'EQUITY': 'salmon', 'COMMODITY': 'cyan', 'FIXED_INCOME': 'lightgreen', "REAL_ESTATE": "lightblue"}

len(stocks)

21

In [3]:
path = "C:\\Users\\Андрей\\Documents\\diploma_python\\data"

df_adj_close, df_close, df_high, df_low, df_volume = get_data(path, stocks)

AGG Start of history: 2003-09-29
BND Start of history: 2007-04-10
BSV Start of history: 2007-04-10
GLD Start of history: 2004-11-18
IEMG Start of history: 2012-10-24
IYT Start of history: 2004-01-02
OIH Start of history: 2001-02-26
QQQ Start of history: 1999-03-10
SOXX Start of history: 2001-07-13
SPY Start of history: 1993-01-29
VEA Start of history: 2007-07-26
VNQ Start of history: 2004-09-29
VOX Start of history: 2004-09-29
VTV Start of history: 2004-01-30
VUG Start of history: 2004-01-30
VWO Start of history: 2005-03-10
VYM Start of history: 2006-11-16
XLF Start of history: 1998-12-22
XLP Start of history: 1998-12-22
XLV Start of history: 1998-12-22
XLY Start of history: 1998-12-22


In [4]:
nodes_matrix, combined_adj_matrix, cov_adj_matrix, future_return, df_return, df_features = features_pipeline(df_adj_close, df_close, df_high, df_low, df_volume, stocks=stocks)

Features generated and scaled
Return features generated
Correlation and covariance matrices generated
Common index length 2416


100%|██████████| 21/21 [00:07<00:00,  2.86it/s]
100%|██████████| 2416/2416 [00:56<00:00, 43.01it/s]
100%|██████████| 2416/2416 [00:04<00:00, 543.98it/s]
100%|██████████| 2416/2416 [00:04<00:00, 535.72it/s]
100%|██████████| 2416/2416 [05:06<00:00,  7.89it/s]
100%|██████████| 2416/2416 [05:17<00:00,  7.62it/s]

Adjacency matrices generated
Pipeline finished





In [5]:
TRAIN = 1800
PERIOD = 90 #max window for features' calculation

train_dataset = Dataset(nodes_matrix[:TRAIN], 
                          combined_adj_matrix[:TRAIN], 
                          cov_adj_matrix[:TRAIN], 
                          future_return[:TRAIN])

test_dataset = Dataset(nodes_matrix[TRAIN+PERIOD:],
                            combined_adj_matrix[TRAIN+PERIOD:],
                            cov_adj_matrix[TRAIN+PERIOD:],
                            future_return[TRAIN+PERIOD:])

len(test_dataset)


526

In [6]:
cases = {0: 10, 1: 100, 2: 500, 3: 0}

In [7]:
model_dic = {}
SEED = 45

for i in range(4):
    set_seed(SEED)

    model = GrossModel(num_features=23, num_relations=5, num_assets=21, sample_size=10, pred_window=5, n_heads=1, storage_size=cases[i],
                       train_gamma=False, gamma=0.01)
    
    optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

    _, _ = train(model, optim, train_dataset, epochs=4,
                                eval_func='sharpe_loss', exp_name=f'exp_2705_per_v{i}', neptune_token=NEPTUNE_TOKEN,
                                neptune_project="aibabynin/graphs", tags=["weights_constraints"], per=False if i==3 else True)
    
    model_dic[i] = {"model": model,
                    "optimizer": optim}

    print(f"{i} is trained")

https://app.neptune.ai/aibabynin/graphs/e/GRAP-43
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.
All 2 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/aibabynin/graphs/e/GRAP-43/metadata
0 is trained
https://app.neptune.ai/aibabynin/graphs/e/GRAP-44
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.
All 2 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/aibabynin/graphs/e/GRAP-44/metadata
1 is trained
https://app.neptune.ai/aibabynin/graphs/e/GRAP-45
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.
All 4 operations synced, thanks for waiting!
Explore the metadata in 

In [8]:
for i in range(4):
    test_weights_matrix = evaluate(model_dic[i]['model'], test_dataset)
    model_dic[i]['test_weights_matrix'] = test_weights_matrix
    
    print(f"{i} is evaluated")

100%|██████████| 511/511 [00:22<00:00, 22.72it/s]


0 is evaluated


100%|██████████| 511/511 [00:25<00:00, 20.15it/s]


1 is evaluated


100%|██████████| 511/511 [00:25<00:00, 20.26it/s]


2 is evaluated


100%|██████████| 511/511 [00:25<00:00, 20.24it/s]

3 is evaluated





In [44]:
ret = df_return.iloc[TRAIN + PERIOD + model.sample_size + model.pred_window:].values
index = df_return.iloc[TRAIN + PERIOD + model.sample_size + model.pred_window:].index

returns_matrix = np.zeros((4, len(ret)))

for i in range(4):

    returns_matrix[i] = (ret * model_dic[i]['test_weights_matrix']).sum(1)

In [1]:
plt.style.use('ggplot')
plt.figure(figsize=(8, 6))
_ = plt.plot(index, (ret.mean(1)+1).cumprod()- 1, color='green', label='Equally weighted', linewidth=2)

for i in range(4):
    _ = plt.plot(index, (returns_matrix[i]+1).cumprod()- 1, alpha=0.7, label=f'PER = {cases[i]}', linestyle='--')

plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
plt.xlabel('Date', fontsize=11, fontname='Georgia')
plt.ylabel('Cumulative return', fontsize=11, fontname='Georgia')
plt.title('Portfolio return under different PER buffers', fontsize=13, fontname='Georgia')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='Scanarios', fontsize=9)
plt.show()

NameError: name 'plt' is not defined

In [21]:
for i in range(4):
    torch.save(model_dic[i]['model'].state_dict(), f"model_buffer_{i}_sr.pt")

In [55]:
cum_ret_b = (ret.mean(1)+1).cumprod()- 1

for i in range(1, 4):
    days_outperformance = ((returns_matrix[i]+1).cumprod()- 1 > cum_ret_b)
    print(f"PER = {cases[i]}: {days_outperformance.sum()} days out of {len(cum_ret_b)} days, pct: {days_outperformance.sum()/len(cum_ret_b):.2f}")
    std = returns_matrix[i].std() * np.sqrt(len(returns_matrix[i]))
    p_ret = ((returns_matrix[i]+1).cumprod()- 1)[-1]
    sharpe = p_ret / std
    print(f"PER = {cases[i]}: Sharpe ratio: {sharpe:.2f}, return: {p_ret:.2f}")
    out_high = days_outperformance[ret.mean(1)>0].sum() / (ret.mean(1)>0).sum()
    out_low = days_outperformance[ret.mean(1)<0].sum() / (ret.mean(1)<0).sum()
    print(f"PER = {cases[i]}: Outperformance on positive days: {out_high:.2f}, days {days_outperformance[ret.mean(1)>0].sum()}")
    print(f"PER = {cases[i]}: Outperformance on negative days: {out_low:.2f}, days {days_outperformance[ret.mean(1)<0].sum()}")
   

PER = 100: 435 days out of 511 days, pct: 0.85
PER = 100: Sharpe ratio: 0.72, return: 0.17
PER = 100: Outperformance on positive days: 0.81, days 211
PER = 100: Outperformance on negative days: 0.89, days 224
PER = 500: 487 days out of 511 days, pct: 0.95
PER = 500: Sharpe ratio: 1.02, return: 0.25
PER = 500: Outperformance on positive days: 0.95, days 245
PER = 500: Outperformance on negative days: 0.96, days 242
PER = 0: 395 days out of 511 days, pct: 0.77
PER = 0: Sharpe ratio: 0.14, return: 0.03
PER = 0: Outperformance on positive days: 0.78, days 201
PER = 0: Outperformance on negative days: 0.77, days 194


In [47]:
(ret>0).sum()

5403

In [51]:
ret.mean(1).shape

(511,)