# Evaluamos los Boosting Model Signals

## Imports & Settings

In [20]:
import warnings
warnings.filterwarnings('ignore')

In [21]:
%matplotlib inline

from time import time
from io import StringIO
import sys, os
import warnings
from pathlib import Path
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import lightgbm as lgb

from scipy.stats import spearmanr, pearsonr

from alphalens import plotting
from alphalens import performance as perf
from alphalens.utils import get_clean_factor_and_forward_returns, rate_of_return, std_conversion
from alphalens.tears import (create_summary_tear_sheet,
                             create_full_tear_sheet)

In [22]:
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from new_utils import MultipleTimeSeriesCV

In [23]:
sns.set_style('whitegrid')

In [24]:
#YEAR = 252
#YEAR = 12
idx = pd.IndexSlice

In [25]:
scope_params = ['lookahead', 'train_length', 'test_length']
daily_ic_metrics = ['daily_ic_mean', 'daily_ic_mean_n', 'daily_ic_median', 'daily_ic_median_n']
lgb_train_params = ['learning_rate', 'num_leaves', 'feature_fraction', 'min_data_in_leaf']
rf_train_params = ['bagging_fraction', 'feature_fraction', 'min_data_in_leaf','max_depth']
catboost_train_params = ['max_depth', 'min_child_samples']

In [26]:
results_path = Path('results', 'us_stocks')
if not results_path.exists():
    results_path.mkdir(parents=True)

### LightGBM

#### Summary Metrics by Fold

In [27]:
with pd.HDFStore(results_path / 'tuning_lgb.h5') as store:
    for i, key in enumerate(
        [k[1:] for k in store.keys() if k[1:].startswith('metrics')]):
        _, t, train_length, test_length = key.split('/')[:4]
        attrs = {
            'lookahead': t,
            'train_length': train_length,
            'test_length': test_length
        }
        s = store[key].to_dict()
        s.update(attrs)
        if i == 0:
            lgb_metrics = pd.Series(s).to_frame(i)
        else:
            lgb_metrics[i] = pd.Series(s)

id_vars = scope_params + lgb_train_params + daily_ic_metrics
lgb_metrics = pd.melt(lgb_metrics.T.drop('t', axis=1), 
                  id_vars=id_vars, 
                  value_name='ic', 
                  var_name='boost_rounds').dropna().apply(pd.to_numeric)

In [None]:
lgb_metrics.to_hdf('../data/model_tuning.h5', 'lgb/metrics')
lgb_metrics.info()

In [None]:
lgb_metrics.groupby(scope_params).size()

#### Information Coefficient by Day

Next, we retrieve the IC per day computed during cross-validation:

In [31]:
int_cols = ['lookahead', 'train_length', 'test_length', 'boost_rounds']

In [32]:
lgb_ic = []
with pd.HDFStore(results_path / 'tuning_lgb.h5') as store:
    keys = [k[1:] for k in store.keys()]
    for key in keys:
        _, t, train_length, test_length = key.split('/')[:4]
        if key.startswith('daily_ic'):
            df = (store[key]
                  .drop(['boosting', 'objective', 'verbose'], axis=1)
                 .assign(lookahead=t, 
                         train_length=train_length, 
                         test_length=test_length))
            lgb_ic.append(df)
    lgb_ic = pd.concat(lgb_ic).reset_index()

In [None]:
lgb_ic

In [34]:
id_vars = ['date'] + scope_params + lgb_train_params
lgb_ic = pd.melt(lgb_ic, 
                 id_vars=id_vars, 
                 value_name='ic', 
                 var_name='boost_rounds').dropna()
lgb_ic.loc[:, int_cols] = lgb_ic.loc[:, int_cols].astype(int)

In [None]:
lgb_ic.to_hdf('../data/model_tuning.h5', 'lgb/ic')
lgb_ic.info()

In [None]:
lgb_daily_ic = lgb_ic.groupby(id_vars[1:] + ['boost_rounds']).ic.mean().to_frame('ic').reset_index()
lgb_daily_ic.to_hdf('../data/model_tuning.h5', 'lgb/daily_ic')
lgb_daily_ic.info()

In [37]:
lgb_ic = pd.read_hdf('../data/model_tuning.h5', 'lgb/ic')
lgb_daily_ic = pd.read_hdf('../data/model_tuning.h5', 'lgb/daily_ic')

## RF

#### Summary Metrics by Fold

In [None]:
with pd.HDFStore(results_path / 'tuning_rf.h5') as store:
    for i, key in enumerate(
        [k[1:] for k in store.keys() if k[1:].startswith('metrics')]):
        _, t, train_length, test_length = key.split('/')[:4]
        attrs = {
            'lookahead': t,
            'train_length': train_length,
            'test_length': test_length
        }
        s = store[key].to_dict()
        s.update(attrs)
        if i == 0:
            rf_metrics = pd.Series(s).to_frame(i)
        else:
            rf_metrics[i] = pd.Series(s)

id_vars = scope_params + rf_train_params + daily_ic_metrics
rf_metrics = pd.melt(rf_metrics.T.drop('t', axis=1), 
                  id_vars=id_vars, 
                  value_name='ic', 
                  var_name='boost_rounds').dropna().apply(pd.to_numeric)

In [None]:
id_vars

In [None]:
import pandas as pd

# Abre el archivo HDF5
store = pd.HDFStore(results_path / 'tuning_rf.h5')

# Imprime las claves
for key in store.keys():
    print(key)

# No olvides cerrar el archivo HDF5 cuando hayas terminado
store.close()


In [None]:
rf_metrics.to_hdf('../data/model_tuning.h5', 'rf/metrics')
rf_metrics.info()

In [None]:
rf_metrics.groupby(scope_params).size()

#### Information Coefficient by Day

In [43]:
int_cols = ['lookahead', 'train_length', 'test_length', 'boost_rounds']

In [44]:
rf_ic = []
with pd.HDFStore(results_path / 'tuning_rf.h5') as store:
    keys = [k[1:] for k in store.keys()]
    for key in keys:
        _, t, train_length, test_length = key.split('/')[:4]
        if key.startswith('daily_ic'):
            if {'boosting_type', 'objective', 'verbose'}.issubset(store[key].columns):
                df = (store[key]
                  .drop(['boosting_type', 'objective', 'verbose'], axis=1)
                  .assign(lookahead=t, 
                 train_length=train_length, 
                 test_length=test_length))
                rf_ic.append(df)
            else:
    
                df = (store[key]
                       .assign(lookahead=t, 
                         train_length=train_length, 
                         test_length=test_length))
                rf_ic.append(df)
    rf_ic = pd.concat(rf_ic).reset_index()

In [45]:
#borramos bagging freq
rf_ic=rf_ic.drop(['bagging_freq'], axis=1)

In [46]:
id_vars = ['date'] + scope_params + rf_train_params
rf_ic = pd.melt(rf_ic, 
                 id_vars=id_vars, 
                 value_name='ic', 
                 var_name='boost_rounds').dropna()
rf_ic.loc[:, int_cols] = rf_ic.loc[:, int_cols].astype(int)

In [None]:
rf_train_params

In [None]:
rf_ic.to_hdf('../data/model_tuning.h5', 'rf/ic')
rf_ic.info()

In [None]:
rf_daily_ic = rf_ic.groupby(id_vars[1:] + ['boost_rounds']).ic.mean().to_frame('ic').reset_index()
rf_daily_ic.to_hdf('../data/model_tuning.h5', 'rf/daily_ic')
rf_daily_ic.info()

In [50]:
rf_ic = pd.read_hdf('../data/model_tuning.h5', 'rf/ic')
rf_daily_ic = pd.read_hdf('../data/model_tuning.h5', 'rf/daily_ic')

## Validation Performance: Daily vs Overall Information Coefficient

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(15, 5), sharey=True)

# Usar pd.concat en lugar de append
data_combined = pd.concat([
    rf_metrics.assign(model='rf'),
    lgb_metrics.assign(model='lightgbm')
])

sns.boxenplot(x='lookahead', y='ic', hue='model', data=data_combined, ax=axes[0])
axes[0].axhline(0, ls='--', lw=1, c='k')
axes[0].set_title('Overall IC')

# Usar pd.concat en lugar de append
data_combined_daily = pd.concat([
    rf_daily_ic.assign(model='rf'),
    lgb_daily_ic.assign(model='lightgbm')
])

sns.boxenplot(x='lookahead', y='ic', hue='model', data=data_combined_daily, ax=axes[1])
axes[1].axhline(0, ls='--', lw=1, c='k')
axes[1].set_title('Daily IC')

fig.tight_layout()


## HyperParameter Impact: Linear Regression

In [52]:
lin_reg = {}
#for t in [1, 2]: #si hay 2 lookaheads
for t in [1]:
    df_ = lgb_ic[lgb_ic.lookahead==t]
    y, X = df_.ic, df_.drop(['ic'], axis=1)
    X = sm.add_constant(pd.get_dummies(X, columns=X.columns, drop_first=True, dtype=float))
    model = sm.OLS(endog=y, exog=X)
    lin_reg[t] = model.fit()
    s = lin_reg[t].summary()
    coefs = pd.read_csv(StringIO(s.tables[1].as_csv())).rename(columns=lambda x: x.strip())
    coefs.columns = ['variable', 'coef', 'std_err', 't', 'p_value', 'ci_low', 'ci_high']
    coefs.to_csv(f'results/linreg_result_{t:02}.csv', index=False)

In [53]:
def visualize_lr_result(model, ax):
    ci = model.conf_int()
    errors = ci[1].sub(ci[0]).div(2)

    coefs = (model.params.to_frame('coef').assign(error=errors)
             .reset_index().rename(columns={'index': 'variable'}))
    coefs = coefs[~coefs['variable'].str.startswith('date')&(coefs.variable!='const')]

    coefs.plot(x='variable', y='coef', kind='bar', 
                 ax=ax, color='none', capsize=3,
                 yerr='error', legend=False)
    ax.set_ylabel('IC')
    ax.set_xlabel('')
    ax.scatter(x=np.arange(len(coefs)), marker='_', s=120, y=coefs['coef'], color='black')
    ax.axhline(y=0, linestyle='--', color='black', linewidth=1)
    ax.xaxis.set_ticks_position('none')

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=True)
axes = axes.flatten()
#for i, t in enumerate([1, 2]):#modificado
for i, t in enumerate([1]):#modificado un lookahead
    visualize_lr_result(lin_reg[t], axes[i])
    axes[i].set_title(f'Lookahead: {t} Day(s)')
fig.suptitle('OLS Coefficients & Confidence Intervals', fontsize=20)
fig.tight_layout()
fig.subplots_adjust(top=.92);

# Lo mismo para RF

In [55]:
lin_reg = {}
#for t in [1, 2]:
for t in [1]:
    df_ = rf_ic[rf_ic.lookahead==t]
    y, X = df_.ic, df_.drop(['ic'], axis=1)
    #X = sm.add_constant(pd.get_dummies(X, columns=X.columns, drop_first=True))
    X = sm.add_constant(pd.get_dummies(X, columns=X.columns, dtype=float))
    model = sm.OLS(endog=y, exog=X)
    lin_reg[t] = model.fit()
    s = lin_reg[t].summary()
    coefs = pd.read_csv(StringIO(s.tables[1].as_csv())).rename(columns=lambda x: x.strip())
    coefs.columns = ['variable', 'coef', 'std_err', 't', 'p_value', 'ci_low', 'ci_high']
    coefs.to_csv(f'results/linreg_result_{t:02}.csv', index=False)

In [None]:
coefs.tail(20)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 8), sharey=True)
axes = axes.flatten()
#for i, t in enumerate([1, 2]):#modificado
for i, t in enumerate([1]):#modificado
    visualize_lr_result(lin_reg[t], axes[i])
    axes[i].set_title(f'Lookahead: {t} Day(s)')
fig.suptitle('OLS Coefficients & Confidence Intervals', fontsize=20)
fig.tight_layout()
fig.subplots_adjust(top=.92);

## Cross-validation Result: Best Hyperparameters

### LightGBM

In [None]:
group_cols = scope_params + lgb_train_params + ['boost_rounds']
lgb_daily_ic.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(10, 'ic'))

In [59]:
lgb_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(3, 'ic'))
lgb_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(3, 'ic')).to_csv('results/best_lgb_model.csv', index=False)

In [None]:
lgb_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(10, 'daily_ic_mean'))

### Random Forest

In [None]:
group_cols = scope_params + rf_train_params + ['boost_rounds']
rf_daily_ic.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(10, 'ic'))

In [62]:
rf_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(3, 'ic'))
rf_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(3, 'ic')).to_csv('results/best_rf_model.csv', index=False)

In [None]:
rf_metrics.groupby('lookahead', group_keys=False).apply(lambda x: x.nlargest(10, 'daily_ic_mean'))

### Visualization

#### LightGBM

In [None]:
g = sns.catplot(x='lookahead', y='ic',
                col='train_length', row='test_length',
                data=lgb_metrics,
                kind='box')

In [None]:
t=1
g=sns.catplot(x='boost_rounds',
            y='ic',
            col='train_length',
            row='test_length',
            data=lgb_daily_ic[lgb_daily_ic.lookahead == t],
            kind='box')

### Random forest

#### Random Forest

In [None]:
g = sns.catplot(x='lookahead', y='ic',
                col='train_length', row='test_length',
                data=rf_metrics,
                kind='box')

In [None]:
t=1
g=sns.catplot(x='boost_rounds',
            y='ic',
            col='train_length',
            row='test_length',
            data=rf_daily_ic[rf_daily_ic.lookahead == t],
            kind='box')

## AlphaLens Analysis - Validation Performance

### LightGBM

#### Select Parameters

In [None]:
lgb_daily_ic = pd.read_hdf('../data/model_tuning.h5', 'lgb/daily_ic')
lgb_daily_ic.info()

In [69]:
def get_lgb_params(data, t=5, best=0):
    param_cols = scope_params[1:] + lgb_train_params + ['boost_rounds']
    df = data[data.lookahead==t].sort_values('ic', ascending=False).iloc[best]
    return df.loc[param_cols]

In [70]:
def get_lgb_key(t, p):
    key = f'{t}/{int(p.train_length)}/{int(p.test_length)}/{p.learning_rate}/'
    return key + f'{int(p.num_leaves)}/{p.feature_fraction}/{int(p.min_data_in_leaf)}'

In [None]:
best_params = get_lgb_params(lgb_daily_ic, t=1, best=0)
best_params

In [72]:
best_params.to_hdf('data.h5', 'best_params')

#### Plot rolling IC

In [73]:
def select_ic(params, ic_data, lookahead):
    return ic_data.loc[(ic_data.lookahead == lookahead) &
                       (ic_data.train_length == params.train_length) &
                       (ic_data.test_length == params.test_length) &
                       (ic_data.learning_rate == params.learning_rate) &
                       (ic_data.num_leaves == params.num_leaves) &
                       (ic_data.feature_fraction == params.feature_fraction) &
                       (ic_data.boost_rounds == params.boost_rounds), ['date', 'ic']].set_index('date')

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 5))
axes = axes.flatten()


for i, t in enumerate([1]): #modificado para meses un lookahead
    params = get_lgb_params(lgb_daily_ic, t=t)
    data = select_ic(params, lgb_ic, lookahead=t).sort_index()
    #rolling = data.rolling(63).ic.mean().dropna()
    rolling = data.rolling(3*4).ic.mean().dropna()#semanas
    avg = data.ic.mean()
    med = data.ic.median()
    rolling.plot(ax=axes[i], title=f'Horizon: {t} Day(s) | IC: Mean={avg*100:.2f}   Median={med*100:.2f}')
    axes[i].axhline(avg, c='darkred', lw=1)
    axes[i].axhline(0, ls='--', c='k', lw=1)

fig.suptitle('3-Month Rolling Information Coefficient', fontsize=16)
fig.tight_layout()
fig.subplots_adjust(top=0.92);

In [None]:
t

#### Get Predictions for Validation Period

In [76]:
lookahead = 1
topn = 10
for best in range(topn):
    best_params = get_lgb_params(lgb_daily_ic, t=lookahead, best=best)
    key = get_lgb_key(lookahead, best_params)
    rounds = str(int(best_params.boost_rounds))
    if best == 0:
        best_predictions = pd.read_hdf(results_path / 'tuning_lgb.h5', 'predictions/' + key)
        best_predictions = best_predictions[rounds].to_frame(best)
    else:
        best_predictions[best] = pd.read_hdf(results_path / 'tuning_lgb.h5',
                                             'predictions/' + key)[rounds]
best_predictions = best_predictions.sort_index()

In [None]:
best_predictions.to_hdf('../data/predictions.h5', f'lgb/train/{lookahead:02}')
best_predictions.info()

In [78]:
#modificado
def get_trade_prices(tickers):
    idx = pd.IndexSlice
    DATA_STORE = '../data/assets.h5'
    with pd.HDFStore(DATA_STORE) as store:
        prices = (store['data_close']) 
    
    return (prices.loc[idx[tickers, '2012': '2024'], 'open']
            .unstack('ticker')
            .sort_index()
            .shift(-1)
            .tz_localize('UTC')
            .resample('W').last())

In [79]:
test_tickers = best_predictions.index.get_level_values('ticker').unique()#modificado

In [None]:
test_tickers

In [None]:
trade_prices = get_trade_prices(test_tickers)
trade_prices.info()

In [82]:
trade_prices.to_hdf('../data/model_tuning.h5', 'trade_prices/model_selection')
trade_prices = pd.read_hdf('../data/model_tuning.h5', 'trade_prices/model_selection')

In [None]:
trade_prices.tail()

In [84]:
factor = best_predictions.iloc[:, :5].mean(1).dropna().tz_localize('UTC', level='date').swaplevel()

In [None]:
trade_prices.tail()

In [None]:
freq = pd.infer_freq(trade_prices.index)
print(freq)

#### Create AlphaLens Inputs

In [None]:
factor_data = get_clean_factor_and_forward_returns(factor=factor,
                                                   prices=trade_prices,
                                                   quantiles=5,
                                                   #periods=(1, 2))#modificado
                                                   periods=(1,2),max_loss=2)#modificado

#### Compute Alphalens metrics

In [90]:
mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
    factor_data,
    by_date=True,
    by_group=False,
    demeaned=True,
    group_adjust=False,
)

In [91]:
factor_returns = perf.factor_returns(factor_data)

In [92]:
mean_quant_ret, std_quantile = perf.mean_return_by_quantile(factor_data,
                                                            by_group=False,
                                                            demeaned=True)



mean_quant_rateret = mean_quant_ret.apply(rate_of_return, axis=0,
                                          base_period=mean_quant_ret.columns[0])

In [93]:
mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
    factor_data,
    by_date=True,
    by_group=False,
    demeaned=True,
    group_adjust=False,
)

mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
    rate_of_return,
    base_period=mean_quant_ret_bydate.columns[0],
)

compstd_quant_daily = std_quant_daily.apply(std_conversion,
                                            base_period=std_quant_daily.columns[0])

alpha_beta = perf.factor_alpha_beta(factor_data,
                                    demeaned=True)

mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
    mean_quant_rateret_bydate,
    factor_data["factor_quantile"].max(),
    factor_data["factor_quantile"].min(),
    std_err=compstd_quant_daily,
)

In [None]:
mean_ret_spread_quant.mean().mul(10000).to_frame('Mean Period Wise Spread (bps)').join(alpha_beta.T).T

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(18, 4))


plotting.plot_quantile_returns_bar(mean_quant_rateret, ax=axes[0])
plt.setp(axes[0].xaxis.get_majorticklabels(), rotation=0)
axes[0].set_xlabel('Quantile')

plotting.plot_cumulative_returns_by_quantile(mean_quant_ret_bydate['1D'],
                                             freq=pd.tseries.offsets.BDay(),
                                             period='1D',
                                             ax=axes[1])
axes[1].set_title('Cumulative Return by Quantile (1D Period)')

title = "Cumulative Return - Factor-Weighted Long/Short PF (1D Period)"
plotting.plot_cumulative_returns(factor_returns['1D'],
                                 period='1D',
                                 freq=pd.tseries.offsets.BDay(),
                                 title=title,
                                 ax=axes[2])

fig.suptitle('Alphalens - Validation Set Performance', fontsize=14)
fig.tight_layout()
fig.subplots_adjust(top=.85);

In [None]:
create_summary_tear_sheet(factor_data)

In [None]:
create_full_tear_sheet(factor_data)

### Random Forest

#### Select Parameters

In [None]:
rf_daily_ic = pd.read_hdf('../data/model_tuning.h5', 'rf/daily_ic')
rf_daily_ic.info()

In [99]:
def get_rf_params(data, t=5, best=0):
    param_cols = scope_params[1:] + rf_train_params + ['boost_rounds']
    df = data[data.lookahead==t].sort_values('ic', ascending=False).iloc[best]
    return df.loc[param_cols]

In [100]:
def get_rf_key(t, p):
    key = f'{t}/{int(p.train_length)}/{int(p.test_length)}/{p.bagging_fraction}/'
    return key + f'{p.feature_fraction}/{int(p.min_data_in_leaf)}/{int(p.max_depth)}'

In [None]:
best_params_rf = get_rf_params(rf_daily_ic, t=1, best=0)
best_params_rf

In [102]:
best_params_rf.to_hdf('data.h5', 'best_params_rf')

#### Plot rolling IC

In [103]:
def select_ic(params, ic_data, lookahead):
    return ic_data.loc[(ic_data.lookahead == lookahead) &
                       (ic_data.train_length == params.train_length) &
                       (ic_data.test_length == params.test_length) &
                       (ic_data.bagging_fraction == params.bagging_fraction) &
                       (ic_data.feature_fraction == params.feature_fraction) &
                       (ic_data.max_depth == params.max_depth) &
                       (ic_data.boost_rounds == params.boost_rounds), ['date', 'ic']].set_index('date')

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 5))
axes = axes.flatten()
for i, t in enumerate([1]):#modificado para meses
    params = get_rf_params(rf_daily_ic, t=t)
    data = select_ic(params, rf_ic, lookahead=t).sort_index()
    #rolling = data.rolling(63).ic.mean().dropna()
    rolling = data.rolling(3*4).ic.mean().dropna()#semanas
    avg = data.ic.mean()
    med = data.ic.median()
    rolling.plot(ax=axes[i], title=f'Horizon: {t} Day(s) | IC: Mean={avg*100:.2f}   Median={med*100:.2f}')
    axes[i].axhline(avg, c='darkred', lw=1)
    axes[i].axhline(0, ls='--', c='k', lw=1)

fig.suptitle('3-Month Rolling Information Coefficient', fontsize=16)
fig.tight_layout()
fig.subplots_adjust(top=0.92);

#### Get Predictions for Validation Period

In [105]:
lookahead = 1
topn = 10
for best in range(topn):
    best_params = get_rf_params(rf_daily_ic, t=lookahead, best=best)
    key = get_rf_key(lookahead, best_params)
    rounds = str(int(best_params.boost_rounds))
    if best == 0:
        best_predictions = pd.read_hdf(results_path / 'tuning_rf.h5', 'predictions/' + key)
        best_predictions = best_predictions[rounds].to_frame(best)
    else:
        best_predictions[best] = pd.read_hdf(results_path / 'tuning_rf.h5',
                                             'predictions/' + key)[rounds]
best_predictions = best_predictions.sort_index()

In [None]:
key

In [None]:
best_predictions.to_hdf('../data/predictions.h5', f'rf/train/{lookahead:02}')
best_predictions.info()

#### Get Trade Prices

In [108]:
test_tickers = best_predictions.index.get_level_values('ticker').unique()#modificado

In [None]:
trade_prices = get_trade_prices(test_tickers)
trade_prices.info()

In [110]:
# persist result in case we want to rerun:
trade_prices.to_hdf('../data/model_tuning.h5', 'trade_prices/model_selection')
trade_prices = pd.read_hdf('../data/model_tuning.h5', 'trade_prices/model_selection')

In [None]:
trade_prices

In [112]:
factor = best_predictions.iloc[:, :5].mean(1).dropna().tz_localize('UTC', level='date').swaplevel()

In [None]:
best_predictions.iloc[:, :5]

In [None]:
freq = pd.infer_freq(trade_prices.index)
print(freq)

#### Create AlphaLens Inputs

In [None]:
factor_data = get_clean_factor_and_forward_returns(factor=factor,
                                                   prices=trade_prices,
                                                   quantiles=5,
                                                   periods=(1, 2),max_loss=2)#modificado

#### Compute Alphalens metrics

In [118]:
mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
    factor_data,
    by_date=True,
    by_group=False,
    demeaned=True,
    group_adjust=False,
)

In [119]:
factor_returns = perf.factor_returns(factor_data)

In [120]:
mean_quant_ret, std_quantile = perf.mean_return_by_quantile(factor_data,
                                                            by_group=False,
                                                            demeaned=True)



mean_quant_rateret = mean_quant_ret.apply(rate_of_return, axis=0,
                                          base_period=mean_quant_ret.columns[0])

In [121]:
mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile(
    factor_data,
    by_date=True,
    by_group=False,
    demeaned=True,
    group_adjust=False,
)

mean_quant_rateret_bydate = mean_quant_ret_bydate.apply(
    rate_of_return,
    base_period=mean_quant_ret_bydate.columns[0],
)

compstd_quant_daily = std_quant_daily.apply(std_conversion,
                                            base_period=std_quant_daily.columns[0])

alpha_beta = perf.factor_alpha_beta(factor_data,
                                    demeaned=True)

mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread(
    mean_quant_rateret_bydate,
    factor_data["factor_quantile"].max(),
    factor_data["factor_quantile"].min(),
    std_err=compstd_quant_daily,
)

In [None]:
mean_ret_spread_quant.mean().mul(10000).to_frame('Mean Period Wise Spread (bps)').join(alpha_beta.T).T

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(18, 4))


plotting.plot_quantile_returns_bar(mean_quant_rateret, ax=axes[0])
plt.setp(axes[0].xaxis.get_majorticklabels(), rotation=0)
axes[0].set_xlabel('Quantile')

plotting.plot_cumulative_returns_by_quantile(mean_quant_ret_bydate['1D'],
                                             freq=pd.tseries.offsets.BDay(),
                                             period='1D',
                                             ax=axes[1])
axes[1].set_title('Cumulative Return by Quantile (1D Period)')

title = "Cumulative Return - Factor-Weighted Long/Short PF (1D Period)"
plotting.plot_cumulative_returns(factor_returns['1D'],
                                 period='1D',
                                 freq=pd.tseries.offsets.BDay(),
                                 title=title,
                                 ax=axes[2])

fig.suptitle('Alphalens - Validation Set Performance', fontsize=14)
fig.tight_layout()
fig.subplots_adjust(top=.85);

#### Summary Tearsheet

In [None]:
create_summary_tear_sheet(factor_data)

In [None]:
create_full_tear_sheet(factor_data)