# Interpretación de los resultados

## Imports & Settings

In [None]:
%matplotlib inline

from pathlib import Path
import warnings
from random import randint
import joblib
from itertools import product

import numpy as np
import pandas as pd

import shap
import lightgbm as lgb
#from sklearn.inspection import (plot_partial_dependence, 
#                                partial_dependence)

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [8]:
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')
idx = pd.IndexSlice
np.random.seed(42)

In [9]:
DATA_STORE = Path('../data/assets.h5')

In [10]:
tiporf=0 # 0 para LGBM

In [11]:
with pd.HDFStore('data.h5') as store:
    best_params = store['best_params_rf']
    if tiporf==0:
        best_params = store['best_params']

In [None]:
best_params

## Obteniendo Datos

In [None]:
with pd.HDFStore(DATA_STORE) as store:
    # Obtiene la lista de claves
    keys = store.keys()
# Imprime las claves
for key in keys:
    print(key)

In [14]:
data = pd.read_hdf(DATA_STORE,'engineered_features').sort_index()#modificado
data0=data.copy()
data = data.loc[idx[:, '2000': '2019'], :]#para incorporar otro train set más actualizado 

# XLE, XLB, XLI, XLK, XLF, XLP, XLY, XLV, XLU, IYR, VOX
# data = data[data['sector'] == 'VOX']
# AQUIIIII

In [None]:
#miramos las exposiciones a los factores
resultado = data.groupby(level=0).last().iloc[:, 6:11]
print(resultado)

In [17]:
unknown_tickers = data[data['sector'] == 'Unknown'].index.get_level_values(0)

In [None]:
unknown_tickers.unique()

## Tratamiento NaN

In [20]:
#completamos con los valores del periodo anterior, para evitar que el último dato apareza nan
data= data.fillna(method='ffill')

## Datos binarios

In [21]:
dates = sorted(data.index.get_level_values('date').unique())

In [22]:
# train_dates = dates[-int(best_params.train_length+best_params.test_length):-int(best_params.test_length)]
train_dates = dates[:-int(best_params.test_length)]

In [None]:
len(train_dates)

In [25]:
data_copia=data.copy()

In [26]:
data = data.loc[idx[:, train_dates], :]

In [27]:
labels = sorted(data.filter(like='target').columns)#modificado
features = data.columns.difference(labels).tolist()

In [29]:
lookahead = 1
label = 'target_1m'#modificado

In [30]:
categoricals = ['month','sector', 'fase']#modificado

In [31]:
for feature in categoricals:
    data[feature] = pd.factorize(data[feature], sort=True)[0]

In [32]:
lgb_train = lgb.Dataset(data=data[features],
                       label=data[label],
                       categorical_feature=categoricals,
                       free_raw_data=False)

## Train LightGBM Model

In [33]:
params = dict(boosting='rf', objective='regression', bagging_freq=1, verbose=-1)
if tiporf==0:
    params = dict(boosting='gbdt', objective='regression', verbose=-1)

In [34]:
train_params = ['bagging_fraction','feature_fraction', 'min_data_in_leaf','max_depth']
if tiporf==0:
    train_params = ['learning_rate', 'num_leaves', 'feature_fraction', 'min_data_in_leaf']

In [35]:
if tiporf==1:
    params.update(best_params.loc[train_params].to_dict())
    for p in ['min_data_in_leaf','max_depth']:
        params[p] = int(params[p])

if tiporf==0:
    params.update(best_params.loc[train_params].to_dict())
    for p in ['min_data_in_leaf', 'num_leaves']:
        params[p] = int(params[p])

In [None]:
best_params

In [None]:
params

In [40]:
lgb_model = lgb.train(params=params,
                  train_set=lgb_train,
                  num_boost_round=int(best_params.boost_rounds))

In [41]:
for feature in categoricals:
    data_copia[feature] = pd.factorize(data_copia[feature], sort=True)[0]

## Compute Feature Importance

In [42]:
def get_feature_importance(model, importance_type='split'):
    fi = pd.Series(model.feature_importance(importance_type=importance_type), 
                   index=model.feature_name())
    return fi/fi.sum()

In [43]:
feature_importance = (get_feature_importance(lgb_model).to_frame('Split').
                      join(get_feature_importance(lgb_model, 'gain').to_frame('Gain')))

In [None]:
(feature_importance
 .nlargest(20, columns='Gain')
 .sort_values('Gain', ascending=False)
 .plot
 .bar(subplots=True,
      layout=(2, 1),
      figsize=(14, 6),
      legend=False,
      sharey=True,
      rot=0))
plt.suptitle('Normalized Importance (Top 20 Features)', fontsize=14)
plt.tight_layout()
plt.subplots_adjust(top=.9);

In [None]:
(feature_importance
 .nlargest(30, columns='Gain')
 .sort_values('Gain', ascending=False)).index

# AQUIIIII

## Partial Dependence Plots

## SHAP Values

### Summary Plot

In [49]:
X = data[features].sample(n=len(data))#modificado

In [None]:
# load JS visualization code to notebook
shap.initjs()

# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(lgb_model)
shap_values = explainer.shap_values(X=X)

shap.summary_plot(shap_values, X, show=False)
plt.tight_layout();

In [None]:
shap.summary_plot(shap_values, X, plot_type="bar",show=False)
plt.tight_layout();

### Feature Interaction

### Force Plots

In [None]:
i = randint(0, len(X))
# visualize the first prediction's explanation
shap.force_plot(explainer.expected_value, shap_values[i,:], X.iloc[i,:])

In [None]:
shap.force_plot(explainer.expected_value, shap_values[:1000,:], X.iloc[:1000])

### Interaction Plot

In [None]:
shap.dependence_plot(ind='sentiment',
                     shap_values=shap_values,
                     features=X,
                    #  interaction_index='sentiment',
                     title='Interaction between')

In [None]:
shap.dependence_plot(ind='return_12m',
                     shap_values=shap_values,
                     features=X,
                     #interaction_index='return_9m',
                     title='Interaction between')

In [None]:
shap.dependence_plot(ind='eu_hy_oas_diff',
                     shap_values=shap_values,
                     features=X,
                     #interaction_index='return_9m',
                     title='Interaction between')

In [None]:
shap.dependence_plot(ind='return_52m',
                     shap_values=shap_values,
                     features=X,
                     #interaction_index='return_9m',
                     title='Interaction between')

In [None]:
shap.dependence_plot(ind='vixoil',
                     shap_values=shap_values,
                     features=X,
                     #interaction_index='return_9m',
                     title='Interaction between')

In [None]:
shap.dependence_plot(ind='RMW',
                     shap_values=shap_values,
                     features=X,
                     #interaction_index='return_9m',
                     title='Interaction between')