# Vectorized Backtest

In [173]:
import warnings
warnings.filterwarnings('ignore')

In [174]:
from pathlib import Path
from time import time
import datetime

import numpy as np
import pandas as pd
import pandas_datareader.data as web

from scipy.stats import spearmanr

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [175]:
sns.set_style('whitegrid')
np.random.seed(42)

# settings iniciales

In [176]:
#periodo target. 1 implica que queremos predecir 1 periodo siguiente. (una semana en caso de datos semanales)
periodo_target=1 

## Load Data

### Return Predictions

In [None]:


# Lee el archivo hdf
store = pd.HDFStore('../data/predictions.h5')

# Obtiene las claves
keys = store.keys()

# Cierra el archivo hdf
store.close()

# Imprime las claves
print(keys)

In [178]:
#tomamos los resultados de las 10 mejores configuraciones de los predictores
predictions = pd.concat([pd.read_hdf('../data/predictions.h5', 'lgb/train/01'),
                   pd.read_hdf('../data/predictions.h5', 'lgb/test/01').drop('y_test', axis=1)])
#tomamos la media
predictions = (predictions.loc[~predictions.index.duplicated()]#si se duplican indices tomara los del train, ose los primeros
                   .iloc[:, :10]
                   .mean(1)
                   .sort_index()
                   .dropna()
                  .to_frame('prediction'))
tickers = predictions.index.get_level_values('ticker').unique().tolist()#modificado ticker por symbol


In [179]:
datos_train=(pd.read_hdf('../data/predictions.h5', 'lgb/train/01'))
datos_test=pd.read_hdf('../data/predictions.h5', 'lgb/test/01')

In [None]:
#fin del train
datos_train.sort_index().loc['XLY'].tail(1)

In [None]:
#comienzo del test
datos_test.sort_index().loc['XLY'].head(1)

In [184]:
predicciones=(predictions
            .unstack('ticker')
            .prediction)


In [None]:
predicciones.info()

### SP500 Benchmark

In [186]:
#sp500 = web.DataReader('SP500', 'fred', '2014', '2018').pct_change()

In [187]:
#sp500.info()

## Compute Forward Returns

In [188]:
DATA_STORE = '../data/assets.h5'

In [189]:
idx = pd.IndexSlice

In [190]:
with pd.HDFStore(DATA_STORE) as store:
    data = (store['data_raw']
            .sort_index()
            .loc[idx[:, :'2024'], :])

In [None]:
data.info()

In [192]:
#localizamos target_1m para las fechas y tickers de predictions
fwd_returns = (data.loc[idx[predictions.index.get_level_values('ticker').unique(),
               predictions.index.get_level_values('date').unique()],:]
               .sort_index(ascending=False))

In [None]:
fwd_returns

# Definimos period target

In [194]:
#creamos la variable target_{lookahead} mesesm
fwd_returns[f'target_{periodo_target}m'] = data.groupby(level='ticker')[f'return_{periodo_target}m'].shift(-periodo_target)

In [195]:
#se hace la asignación del retorno de un perdio fwd

fwd_returns = fwd_returns[f'target_{periodo_target}m'].unstack('ticker').sort_index()

In [None]:
#eliminamos SPY
#fwd_returns.drop('SPY.US', axis=1, inplace=True)
fwd_returns.head()

In [197]:

media = fwd_returns.mean(axis=1).shift(1)
media.name = 'mediasec'

## Generate Signals

In [None]:
#eliminamos SPY si comparamos con benchmark
#predicciones.drop('SPY.US', axis=1, inplace=True)
predicciones

In [199]:
N_LONG = N_SHORT = 4

In [200]:
long_signals = ((predicciones
                .where(predicciones > 0)
                .rank(axis=1, ascending=False, method='first') < N_LONG)
                .astype(int))
short_signals = ((predicciones
                  .where(predicciones < 0)
                  .rank(axis=1, method='first') < N_SHORT)
                 .astype(int))

## Compute Portfolio Returns

In [201]:
long_returns = long_signals.mul(fwd_returns).replace(0, np.nan).mean(axis=1).fillna(0)
short_returns = short_signals.mul(-fwd_returns).replace(0, np.nan).mean(axis=1).fillna(0)
strategy = long_returns.add(short_returns).to_frame('Strategy')

In [None]:
short_signals.loc['2019':].describe()

In [None]:
short_signals.loc['2019':].sum()

In [None]:
long_signals.loc['2019':].describe()

In [None]:
long_signals.loc['2019':].sum()

## Plot results

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy.loc['2019':].join(media).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
#strategy.join(sp500).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(media, ax=axes[1], hist=False, label='media')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
sns.despine()
fig.tight_layout();

In [None]:
#en vez de s&P, la media
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy.loc['2019':].join(media).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(media, ax=axes[1], hist=False, label='media')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
sns.despine()
fig.tight_layout();

In [208]:
res = strategy.join(media).dropna()

In [None]:
res.std()

In [None]:
fwd_returns.loc['2019':].cumsum().plot(figsize=(14,5))

solo largos

In [211]:
#sólo largos
strategy_long = long_returns.to_frame('Strategy')

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy_long.loc['2019':].join(media).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy_long.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(media, ax=axes[1], hist=False, label='media')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
sns.despine()
fig.tight_layout();

In [None]:
long_signals.loc[:'2019'].sum()

solo cortos

In [214]:
#sólo CORTOS
strategy_short = short_returns.to_frame('Strategy')

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy_short.loc['2019':].join(media).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy_short.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(media, ax=axes[1], hist=False, label='media')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
sns.despine()
fig.tight_layout();