# Predicciones multivariadas


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from pylab import rcParams
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import acf, pacf, adfuller, arma_order_select_ic
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import ParameterGrid
import itertools
import warnings
import seaborn as sns
from dateutil.relativedelta import relativedelta




#Own packages
import download
import descriptive
import models_multivariate as mv
import grid

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
warnings.filterwarnings("ignore")

In [3]:
params = {
#     'beginings': ['2015-07-01', '2016-07-01', '2017-07-01', '2018-07-01', '2019-07-01'],
    'beginings': ['2015-07-01'],
    'periods': 18,
    'econometric_models': 'small',
    'ml_models': 'small',
    'us_economy_model':{
        'endog_vars': ['tbill_3meses_mensual', 'cons_price_index_us',\
                      'ind_prod_ind_us', 'trade_weighted_exchange_rate',
                      'commodity_price_index'],
        'endog_transformation': 'log_diff',
        'year_dummies': [2000, 2009],
        'start_var': '1992-01-01'},
    'mx_economy_model':{
        'endog_vars': ['igae', 'tc_mensual', 'tasa_cetes_91_mensual', 'inpc', 'asegurados_imss',\
                       'ing_trib_sin_gasol_neto_(mdp)_r'],
        'endog_transformation': 'log_diff',
        'year_dummies': [2009, 2015],
        'exog_transformation': 'log_diff',
        'start_var': '2000-01-01'},
    'ingresos_fiscales_var':{
        'endog_vars': ['isr_neto_(mdp)_r', 'iva_neto_(mdp)_r'],
        'endog_transformation': 'log_diff',
        'year_dummies': [2009, 2015],
        'exog_transformation': 'log_diff',
        'month_dummies': [1, 4, 12],
        'start_var': '2000-01-01'},
        'ingresos_fiscales_arima': {
            'endog_vars': ['isr_neto_(mdp)_r', 'iva_neto_(mdp)_r', 'ing_trib_sin_gasol_neto_(mdp)_r'],
            'endog_transformation': 'log_diff',
            'year_dummies': [2009, 2015],
            'exog_transformation': 'log_diff',
            'month_dummies': [1, 4, 12],
            'exog_vars': [False, ['igae'], ['igae', 'inpc', 'tasa_cetes_91_mensual',\
                                            'tc_mensual', 'asegurados_imss', 'commodity_price_index']],
            'start_arima': '2000-01-01'},
        'ml_model':{
            'endog_vars': ['isr_neto_(mdp)_r', 'iva_neto_(mdp)_r'],
            'endog_transformation': 'log_diff',
            'endog_lags': range(1, 13),
            'year_dummies': True,
            'exog_transformation': 'log_diff',
            'exog_pct_changes': [1, 12],
            'exog_lags': range(1, 13),
            'month_dummies': True,
            'start': '2000-01-01'}
}
params['models'] = {**grid.econometric_models[params['econometric_models']], **grid.ml_models[params['ml_models']]}

In [4]:
all_models_params = {}
for model, specifications in params['models'].items():
    all_models_params[model] = list(ParameterGrid(specifications))

## Descarga

In [6]:
download.get_files(inpc_2018=False, pibr_2013=False, pibr_2013_sa=False,
                  fiscal_current=False, fiscal_hist=False, igae=False,
                  igae_sa=False, igae_prim=False,
                  igae_secun=False, igae_terc=False, confianza_consumidor=False,
                  indic_mens_consumo=False, indic_adelant=False, pea=False,
                  pobl_ocupada=False, asegurados_imss=False, 
                  imai=False, imai_mineria=False, imai_construccion=False,
                  imai_manufacturas=False, imai_egergia_gas_agua_gas=False,
                  tc_diario=False, tc_mensual=False, indice_tc_real=False, 
                  tasa_cetes_28_diario=False, tasa_cetes_91_diario=False,
                  tasa_cetes_28_mensual=False, tasa_cetes_91_mensual=False,
                  pibr_us_2012=False, pibr_us_2012_sa=False, ind_prod_ind_us_sa=False,
                  ind_prod_ind_us=False, tbill_3meses_mensual=False, tbill_3meses_diario=False,
                  cons_price_index_us=False, cons_price_index_us_sa=False, trade_weighted_exchange_rate=False,
                  commodity_price_index=False)

## Importamos datos

In [7]:
# Importamos csv con ingresos netos
ingresos_netos = download.load_ingresos_fiscales_netos('../inputs/downloads/ingresos_tributarios_netos_updated.csv')

# Igaes
igae = download.load_inegi_indic('igae')

# Imai
imai = download.load_inegi_indic('imai')

# Ocupación
asegurados_imss = download.load_inegi_indic('asegurados_imss')

# Tipo de cambio
tc_mensual = download.load_banxico('tc_mensual')
trade_weighted_exchange_rate = download.load_banxico('trade_weighted_exchange_rate')

# Tasas de interés
tasa_cetes_91_mensual = download.load_banxico('tasa_cetes_91_mensual')
tbill_3meses_mensual = download.load_fed('tbill_3meses_mensual')

# Precios
cons_price_index_us = download.load_fed('cons_price_index_us')
inpc = download.load_inpc()
commodity_price_index = download.load_fed('commodity_price_index')

# Indicadores US
ind_prod_ind_us = download.load_fed('ind_prod_ind_us')

# Semana santa
semana_santa = pd.read_excel('../inputs/semana_santa.xlsx', index_col='fecha')
semana_santa['semana_santa'] = 1
semana_santa = semana_santa.reindex(pd.date_range('1990-01-01', '2022-12-01', freq='MS'))
semana_santa = semana_santa.fillna(0)

In [8]:
# Nos quedamos unicamente ingresos reales
ingresos_reales = ['ieps_neto_(mdp)_r', 'ieps_sin_gas_neto_(mdp)_r', 'ing_gob_fed_neto_(mdp)_r',\
                   'ing_trib_neto_(mdp)_r', 'isr_neto_(mdp)_r', 'iva_neto_(mdp)_r',\
                   'importaciones_neto_(mdp)_r', 'ing_trib_sin_gasol_neto_(mdp)_r',\
                   'ieps_gasolina_neto_(mdp)_r']
ingresos_netos_reales = ingresos_netos[ingresos_reales]

## Construimos DF conjunto

In [9]:
# Primero construniso DF de USA
df_netos_us = pd.concat([tbill_3meses_mensual, cons_price_index_us, ind_prod_ind_us, trade_weighted_exchange_rate,
                         commodity_price_index],
                        axis=1)

df_netos_us = df_netos_us.asfreq('MS')
df_netos_us = df_netos_us.loc['1992-01-01':]

In [10]:
# Vamos cuales son las variables que tenemos que hacer nowcast
vars_to_predict_us = df_netos_us.columns[df_netos_us.tail().isna().any()]
for var in vars_to_predict_us:
    # Volvemos a obtener variables que tienen valores nulos, pues cada que predecimos una variable,
    # acutalizamos.
    empty_vars = df_netos_us.columns[df_netos_us.tail().isna().any()]
    us_vars_to_use_as_covars = [var for var in df_netos_us.columns if var not in empty_vars]
    exog = df_netos_us[us_vars_to_use_as_covars].copy()
    # Creamos variable de mes
    exog = mv.construct_exog(exog, month_dummies=True, year_dummies=[2000, 2009])
    prediction_start = df_netos_us.loc[df_netos_us[var].notna()].index.max() + relativedelta(months=1)
    _, prediction = mv.predict_with_econometric_model(
        model_name='SARIMA',
        params={'order': (3, 0, 3), 'seasonal_order': (1, 0, 1, 12)},
        df=df_netos_us,
        endog_vars=[var],
        prediction_start=prediction_start,
        prediction_end=df_netos_us.index.max(),
        transformation='log_diff',
        lags_endog=None,
        exog_df=exog,
        train_start=None)
    df_netos_us.loc[pd.date_range(prediction_start, df_netos_us.index.max(), freq='MS'), var] = prediction
    print('Nowcasted {}'.format(var))

Nowcasted commodity_price_index


In [11]:
# Ahora construimos DF conjunto
df_netos = pd.concat([ingresos_netos_reales, igae, asegurados_imss, tc_mensual, tasa_cetes_91_mensual,
                      imai, inpc, df_netos_us], axis=1)
df_netos = df_netos.asfreq('MS')
# Filtramos df para post 2000
df_netos = df_netos.loc['2000-01-01':]
df_netos = df_netos.merge(semana_santa, left_index=True, right_index=True, how='left')

In [12]:
# Vamos cuales son las variables que tenemos que hacer nowcast
vars_to_predict = df_netos.columns[df_netos.tail().isna().any()]
vars_to_predict

Index(['igae', 'asegurados_imss', 'imai'], dtype='object')

In [13]:
vars_to_predict = df_netos.columns[df_netos.tail().isna().any()]
for var in vars_to_predict:
    # Volvemos a obtener variables que tienen valores nulos, pues cada que predecimos una variable,
    # acutalizamos.
    empty_vars = df_netos.columns[df_netos.tail().isna().any()]
    vars_to_use_as_covars = [var for var in df_netos.columns if var not in empty_vars]
    exog = df_netos[vars_to_use_as_covars].copy()
    # Creamos variable de mes
    exog = mv.construct_exog(exog, month_dummies=True, year_dummies=[2000, 2009])
    prediction_start = df_netos.loc[df_netos[var].notna()].index.max() + relativedelta(months=1)
    _, prediction = mv.predict_with_econometric_model(
        model_name='SARIMA',
        params={'order': (3, 0, 3), 'seasonal_order': (1, 0, 1, 12),
                'enforce_stationarity': False, 'enforce_invertibility': False},
        df=df_netos,
        endog_vars=[var],
        prediction_start=prediction_start,
        prediction_end=df_netos.index.max(),
        transformation='log_diff',
        lags_endog=None,
        exog_df=exog,
        train_start=None)
    df_netos.loc[pd.date_range(prediction_start, df_netos.index.max(), freq='MS'), var] = prediction
    print('Nowcasted {}'.format(var))

Nowcasted igae
Nowcasted asegurados_imss
Nowcasted imai


In [15]:
# Predecimos US
begin_and_ends = mv.get_start_and_end_dates(params['beginings'], params['periods'])
# Obtenemos predicciones y resultados
prediction_us, accuracies_us = mv.run_predictions(
                                    model_name='VAR',
                                    params=all_models_params['VAR'],
                                    begin_and_ends=begin_and_ends,
                                    df=df_netos_us,
                                    endog_cols=params['us_economy_model']['endog_vars'],
                                    endog_transformation=params['us_economy_model']['endog_transformation'],
                                    endog_lags=None,
                                    us_prediction_dict=None,
                                    exog=True,
                                    mex_prediction_dict=None,
                                    semana_santa=None,
                                    exog_pct_changes=None,
                                    exog_month_dummies=None,
                                    exog_year_dummies=params['us_economy_model']['year_dummies'],
                                    exog_lags=None,
                                    train_start=params['us_economy_model']['start_var'])
# Vemos resultados y nos quedamos con la mejor especificación.
us_var_accuracies = pd.DataFrame(accuracies_us)
us_var_accuracies.to_csv('../results/results_var_us.csv')
us_var_accuracies.groupby(['params', 'variable']).mean()

COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPL

Unnamed: 0_level_0,Unnamed: 1_level_0,forecast_biass,mae,mape,rmse
params,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",commodity_price_index,-10.368454,11.562484,0.107717,13.167238
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",cons_price_index_us,-1.878735,1.940145,0.007948,2.099118
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",ind_prod_ind_us,-0.726239,2.060474,0.019783,2.375832
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",tbill_3meses_mensual,0.361742,0.385617,0.499361,0.462562
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",trade_weighted_exchange_rate,4.633248,5.16239,0.04165,5.759568
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",commodity_price_index,3.117787,9.007729,0.0804,10.385336
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",cons_price_index_us,-0.013607,1.112796,0.004533,1.304005
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",ind_prod_ind_us,1.542896,2.402951,0.022604,2.895623
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",tbill_3meses_mensual,0.030699,0.410566,0.503906,0.494518
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",trade_weighted_exchange_rate,3.457626,4.318589,0.034995,4.827105


In [16]:
best_specification = "{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}"
best_prediction_us = {}
for split, predictions in prediction_us.items():
    best_prediction_us[split] = predictions[best_specification]

In [17]:
# Predecimos Mexico
prediction_mx, accuracies_mx = mv.run_predictions(
    model_name='VAR',
    params=all_models_params['VAR'],
    begin_and_ends=begin_and_ends,
    df=df_netos,
    endog_cols=params['mx_economy_model']['endog_vars'],
    endog_transformation=params['mx_economy_model']['endog_transformation'],
    endog_lags=None,
    exog=True,
    exog_transformation=params['mx_economy_model']['exog_transformation'], 
    mex_prediction_dict=None,
    us_prediction_dict=best_prediction_us,
    semana_santa=semana_santa,
    exog_pct_changes=None,
    exog_month_dummies=None,
    exog_year_dummies=params['mx_economy_model']['year_dummies'],
    exog_lags=None,
    train_start=params['mx_economy_model']['start_var'])
# Vemos resultados y nos quedamos con la mejor especificación.
mx_var_accuracies = pd.DataFrame(accuracies_mx)
mx_var_accuracies.to_csv('../results/results_var_mx.csv')
mx_var_accuracies.groupby(['params', 'variable']).mean()

COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPL

Unnamed: 0_level_0,Unnamed: 1_level_0,forecast_biass,mae,mape,rmse
params,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",asegurados_imss,54958.926702,463740.926894,0.017485,587240.093977
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",igae,0.422983,1.806044,0.016196,2.192484
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",ing_trib_sin_gasol_neto_(mdp)_r,-1811.322735,17243.029841,0.07319,20199.341534
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",inpc,0.118792,0.475857,0.005079,0.578736
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",tasa_cetes_91_mensual,1.255689,1.290036,0.18668,1.607244
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",tc_mensual,-0.440817,1.704516,0.090585,2.012077
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",asegurados_imss,-146437.814601,552545.675048,0.020723,689928.340153
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",igae,0.067639,2.105408,0.018837,2.523434
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",ing_trib_sin_gasol_neto_(mdp)_r,1829.412532,17857.794847,0.075642,20698.767338
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",inpc,0.27473,0.615534,0.006542,0.752117


In [18]:
best_specification = "{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}"
best_prediction_mx = {}
for split, predictions in prediction_mx.items():
    best_prediction_mx[split] = predictions[best_specification]

In [19]:
best_prediction_mx[pd.to_datetime('2019-07-01')].resample('YS').mean().pct_change()

Unnamed: 0,asegurados_imss,igae,ing_trib_sin_gasol_neto_(mdp)_r,inpc,tasa_cetes_91_mensual,tc_mensual
2000-01-01,,,,,,
2001-01-01,0.022792,-0.005092,0.035114,0.063677,-0.242249,-0.012438
2002-01-01,0.011199,-0.000869,0.033167,0.050307,-0.392266,0.034979
2003-01-01,0.020787,0.013444,0.056377,0.045469,-0.124454,0.116869
2004-01-01,0.038711,0.03913,0.005536,0.046884,0.0902,0.045438
2005-01-01,0.038507,0.0234,0.067471,0.039881,0.313813,-0.034731
2006-01-01,0.051956,0.044024,0.131751,0.036295,-0.218044,0.000626
2007-01-01,0.0495,0.023721,0.083697,0.039668,0.007996,0.002572
2008-01-01,0.027006,0.009939,0.098258,0.05125,0.072643,0.02043
2009-01-01,-0.007736,-0.054572,-0.118258,0.052974,-0.300687,0.211227


In [20]:
# Predecimos con Modelo VAR
prediction_var, results_var = mv.run_predictions(
    model_name='VAR',
    params=all_models_params['VAR'],
    begin_and_ends=begin_and_ends,
    df=df_netos,
    endog_cols=params['ingresos_fiscales_var']['endog_vars'],
    endog_transformation=params['ingresos_fiscales_var']['endog_transformation'],
    endog_lags=None,
    exog=True,
    mex_prediction_dict=best_prediction_mx,
    us_prediction_dict=best_prediction_us,
    exog_vars=['igae', 'inpc', 'tasa_cetes_91_mensual', 'tc_mensual', 'asegurados_imss', 'commodity_price_index'],
    exog_transformation=params['ingresos_fiscales_var']['exog_transformation'],
    semana_santa=semana_santa,
    exog_pct_changes=None,
    exog_month_dummies=params['ingresos_fiscales_var']['month_dummies'],
    exog_year_dummies=params['ingresos_fiscales_var']['year_dummies'],
    exog_lags=None,
    train_start=params['ingresos_fiscales_var']['start_var'])
results_var = pd.DataFrame(results_var)
results_var.to_csv('../results/results_var.csv')
results_var.groupby(['params', 'variable']).mean()

COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'c'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'fpe', 'maxlags': 12, 'trend': 'ct'} for split 2015-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'aic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'c'} for split 2016-07-01 00:00:00
COMPLETED VAR with params {'ic': 'bic', 'maxlags': 12, 'trend': 'ct'} for split 2016-07-01 00:00:00
COMPL

Unnamed: 0_level_0,Unnamed: 1_level_0,forecast_biass,mae,mape,rmse
params,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",isr_neto_(mdp)_r,-473.924458,8780.429062,0.062043,12829.252832
"{'ic': 'aic', 'maxlags': 12, 'trend': 'c'}",iva_neto_(mdp)_r,-1006.206922,8053.399032,0.109996,9525.820166
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",isr_neto_(mdp)_r,-4264.73345,10447.532785,0.074657,13950.195279
"{'ic': 'aic', 'maxlags': 12, 'trend': 'ct'}",iva_neto_(mdp)_r,544.217803,8060.936523,0.108364,9464.359721
"{'ic': 'bic', 'maxlags': 12, 'trend': 'c'}",isr_neto_(mdp)_r,-4136.820309,10106.531065,0.072727,13475.538214
"{'ic': 'bic', 'maxlags': 12, 'trend': 'c'}",iva_neto_(mdp)_r,-899.33925,8174.987031,0.111192,9688.591901
"{'ic': 'bic', 'maxlags': 12, 'trend': 'ct'}",isr_neto_(mdp)_r,-8485.679108,13110.152758,0.096351,16255.351928
"{'ic': 'bic', 'maxlags': 12, 'trend': 'ct'}",iva_neto_(mdp)_r,1357.243547,8260.423375,0.109354,9858.138079
"{'ic': 'fpe', 'maxlags': 12, 'trend': 'c'}",isr_neto_(mdp)_r,-473.924458,8780.429062,0.062043,12829.252832
"{'ic': 'fpe', 'maxlags': 12, 'trend': 'c'}",iva_neto_(mdp)_r,-1006.206922,8053.399032,0.109996,9525.820166


In [None]:
predictions_arima = {}
accuracies_arima= []
for model_name in ['ARIMA', 'SARIMA']:
    predictions_arima[model_name] = {}
    for var in params['ingresos_fiscales_arima']['endog_vars']:
        predictions_arima[model_name][var] = {}
        for exog_var in params['ingresos_fiscales_arima']['exog_vars']:
            preds, accurs = mv.run_predictions(
                model_name=model_name,
                params=all_models_params[model_name],
                begin_and_ends=begin_and_ends,
                df=df_netos,
                endog_cols=[var],
                endog_transformation=params['ingresos_fiscales_arima']['endog_transformation'],
                endog_lags=None,
                exog=True,
                mex_prediction_dict=best_prediction_mx,
                us_prediction_dict=best_prediction_us,
                exog_vars = exog_var,
                exog_transformation=params['ingresos_fiscales_arima']['exog_transformation'],
                semana_santa=semana_santa,
                exog_pct_changes=None,
                exog_month_dummies=params['ingresos_fiscales_var']['month_dummies'],
                exog_year_dummies=params['ingresos_fiscales_var']['year_dummies'],
                exog_lags=None,
                train_start=params['ingresos_fiscales_var']['start_var'])
            predictions_arima[model_name][var][exog_var] = preds
            accuracies_arima += accurs

In [None]:
results_arima = pd.DataFrame(accuracies_arima)
results_arima.to_csv('../results/results_arima.csv')
results_arima.groupby(['model', 'params', 'exog_vars', 'variable']).mean().sort_values('mape')
# results_arima_no_covars.groupby(['model', 'params', 'variable']).mean().sort_values('mape')
# results_arima_covars.groupby(['model', 'params', 'variable']).mean().sort_values('mape')

In [112]:
# Vamos a correr modelos de ML
predictions_ml = {}
accuracies_ml = []
for model_name in ['DT', 'RF']:
    preds, accurs = mv.run_predictions(
        model_name=model_name,
        params=all_models_params[model_name],
        begin_and_ends=begin_and_ends,
        df=df_netos,
        endog_cols=params['ml_model']['endog_vars'],
        endog_transformation=params['ml_model']['endog_transformation'],
        endog_lags=params['ml_model']['endog_lags'],
        exog=True,
        mex_prediction_dict=best_prediction_mx,
        us_prediction_dict=best_prediction_us,
        exog_vars = None,
        exog_transformation=params['ml_model']['exog_transformation'],
        semana_santa=semana_santa,
        exog_pct_changes=params['ml_model']['exog_pct_changes'],
        exog_month_dummies=params['ml_model']['month_dummies'],
        exog_year_dummies=params['ml_model']['year_dummies'],
        exog_lags=params['ml_model']['exog_lags'],
        train_start=params['ml_model']['start'])
    predictions_ml[model_name] = preds
    accuracies_ml += accurs

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2015-07-01     115934.347074      74556.719133
2015-08-01     123165.965968      79207.332108
2015-09-01     130848.670439      84148.035651
2015-10-01     139010.597782      89396.924698
2015-11-01     147681.640408      94973.222888
2015-12-01     156893.555324     100897.352970
2016-01-01     166680.080437     107191.011601
2016-02-01     126730.500917      81499.664258
2016-03-01     134635.549835      86583.356250
2016-04-01     102366.345320      65831.214384
2016-05-01      77831.365245      50052.908260
2016-06-01      82686.240315      53175.050802
2016-07-01      87843.947180      56491.942748
2016-08-01      93323.375530      60015.731951
2016-09-01      99144.593338      63759.324011
2016-10-01     105328.920352      67736.429535
2016-11-01     111899.006178      71961.614357
2016-12-01     118878.913235      76450.352881
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2015-07-01     106941.867674      68773.706782
2015-08-01     104800.174401      67396.396021
2015-09-01     102701.372189      66046.668257
2015-10-01     100644.602070      64723.971093
2015-11-01     119404.421121      68364.786793
2015-12-01     125035.625477      52035.984559
2016-01-01     179195.912973      75165.972482
2016-02-01     122261.029332      57845.290176
2016-03-01     155134.696995      59354.556333
2016-04-01     112639.625101      68369.814643
2016-05-01      80182.421938      69982.911166
2016-06-01      90809.671662      84661.622712
2016-07-01      88991.053126      82966.129340
2016-08-01      87208.855527      81304.591115
2016-09-01      85462.349474      79676.328026
2016-10-01     102934.505665      93170.295121
2016-11-01     104184.479496     113410.514058
2016-12-01     125484.239093     132617.696202
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2016-07-01     127237.133764      71193.666561
2016-08-01     122953.988200      68797.095465
2016-09-01     118815.024884      66481.199425
2016-10-01     127822.688102      68551.162085
2016-11-01     137513.244722      70685.575228
2016-12-01     209308.773395     107590.443942
2017-01-01     225176.993270     110940.386535
2017-02-01     136185.509163      80472.732127
2017-03-01     118689.507587      78552.276237
2017-04-01      81596.194940      90044.156460
2017-05-01      56095.430540      85154.129549
2017-06-01      75840.727059      87805.493695
2017-07-01      73287.723356      84849.723632
2017-08-01      70820.660653      81993.452773
2017-09-01      68436.645943      79233.331705
2017-10-01      73624.998671      81700.345530
2017-11-01      79206.693352      84244.172447
2017-12-01     120560.429387     128227.971321
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2017-07-01     135300.238465      78053.249361
2017-08-01     141816.217158      81812.247242
2017-09-01     148646.001494      85752.276217
2017-10-01     155804.704165      89882.054634
2017-11-01     163308.165683      94210.720714
2017-12-01     171172.989426      98747.852768
2018-01-01     179416.578384     103503.490392
2018-02-01     188057.173665     108488.156685
2018-03-01     197113.894854     113712.881529
2018-04-01     206606.782328     119189.226002
2018-05-01     151529.350946      87415.649440
2018-06-01     158826.914005      91625.534919
2018-07-01     166475.923343      96038.165968
2018-08-01     174493.304404     100663.306694
2018-09-01     182896.797749     105511.191435
2018-10-01     191704.998316     110592.547411
2018-11-01     200937.396563     115918.618457
2018-12-01     210614.421596     121501.189903
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2017-07-01     127477.524416      73540.409936
2017-08-01     125891.385906      72625.383724
2017-09-01     124324.982917      71721.742722
2017-10-01     134958.177718      81496.688036
2017-11-01     103541.323126      84564.073621
2017-12-01     110689.484415      59808.520740
2018-01-01     120156.470233      66039.253821
2018-02-01     118661.423906      65217.560710
2018-03-01     122965.859622      53650.275996
2018-04-01      83396.256164      74211.965120
2018-05-01      53663.450887      75962.899557
2018-06-01      62603.920917      81656.307766
2018-07-01      61824.971920      80640.299533
2018-08-01      61055.714992      79636.932977
2018-09-01      57278.944278      74710.769452
2018-10-01      59356.734392      52085.363133
2018-11-01      61868.526827      59453.772102
2018-12-01      44936.021877      43697.020353
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2018-07-01     137298.171346      82979.508263
2018-08-01     147161.856764      88940.867818
2018-09-01     145199.461667      87754.846340
2018-10-01     146580.078465      88589.255873
2018-11-01     144625.441349      87407.923120
2018-12-01     155015.527706      93687.425958
2019-01-01     167098.738958     100990.210242
2019-02-01     132232.010147      96425.681804
2019-03-01     142539.282821     103941.908731
2019-04-01     140638.529488     102555.849214
2019-05-01      88107.896146     117559.355055
2019-06-01      94975.765048     126722.917846
2019-07-01     102378.973294     136600.765629
2019-08-01      96289.269327     128475.481720
2019-09-01      95005.257327     126762.268386
2019-10-01      86587.957428     115531.352761
2019-11-01      85433.311878     113990.748659
2019-12-01      92092.701234     122876.144317
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2019-07-01     148613.081901      84020.886054
2019-08-01     157284.142970      88923.215141
2019-09-01     166461.130564      94111.578232
2019-10-01     176173.563752      99602.664425
2019-11-01     186452.683937     105414.136570
2019-12-01     197331.555355     111564.688083
2020-01-01     208845.171421     118074.103077
2020-02-01     164222.720354      92846.055663
2020-03-01     173804.550021      98263.302976
2020-04-01     183945.446422     103996.627998
2020-05-01     144643.141149      81776.413796
2020-06-01     153082.569860      86547.785663
2020-07-01     162014.410145      91597.550632
2020-08-01     171467.392524      96941.951980
2020-09-01     181471.923841     102598.180726
2020-10-01     192060.185075     108584.430922
2020-11-01     203266.234855     114919.958182
2020-12-01     215126.119013     121625.141621
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2019-07-01     164134.263347      69750.447498
2019-08-01     162202.953362      68929.718584
2019-09-01     154719.491196      65749.548739
2019-10-01     150574.428962      69397.160470
2019-11-01     148802.672765      68580.588560
2019-12-01     191173.876822      72236.366630
2020-01-01     216678.620242      87387.733889
2020-02-01     171466.581310      72326.781637
2020-03-01     172190.937963      56412.454285
2020-04-01     201270.627778      54752.879918
2020-05-01     139888.590980      56813.677708
2020-06-01     190465.142563      61597.533295
2020-07-01     222630.988989      54119.149359
2020-08-01     220011.368665      53482.348417
2020-09-01     217422.572490      52853.040487
2020-10-01     206185.762707      46834.039811
2020-11-01     241006.515151      45456.248807
2020-12-01     309632.542108      47879.353669
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2015-07-01     103119.796114      66315.754304
2015-08-01     105497.298973      67844.712868
2015-09-01     103805.012147      66756.412837
2015-10-01     107118.432322      68887.254503
2015-11-01     109479.358481      70405.552686
2015-12-01     113979.270533      73299.420530
2016-01-01     132601.028182      85274.966948
2016-02-01      98649.763655      63441.101856
2016-03-01     106543.218503      68517.337768
2016-04-01     108224.163774      69598.344111
2016-05-01      98452.580397      63314.294425
2016-06-01      98730.005562      63492.704970
2016-07-01      98889.655695      63595.375063
2016-08-01      99342.828487      63886.807907
2016-09-01     100997.384415      64950.843413
2016-10-01     108560.168938      69814.427120
2016-11-01     109161.562285      70201.179761
2016-12-01     111761.804740      71873.380892
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2016-07-01     124863.437071      69865.499493
2016-08-01     121317.444222      67881.391355
2016-09-01     119795.005982      67029.533433
2016-10-01     121083.312901      67750.386619
2016-11-01     124712.550319      69781.073030
2016-12-01     132700.411876      74250.563464
2017-01-01     167279.502101      93598.784747
2017-02-01     118970.267479      66568.063135
2017-03-01     130375.031792      72949.431244
2017-04-01     135924.878527      78356.156674
2017-05-01     126827.044877      74645.531274
2017-06-01     137816.039459      83567.689452
2017-07-01     119609.022783      72527.477286
2017-08-01     117577.699470      71295.741151
2017-09-01     116430.271872      70599.974000
2017-10-01     121583.642914      73724.830240
2017-11-01     122733.407413      74422.014421
2017-12-01     130216.480174      78959.534895
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2017-07-01     120830.232506      69705.658874
2017-08-01     120766.372393      69668.818663
2017-09-01     117966.032298      68053.332644
2017-10-01     116757.496367      67356.141290
2017-11-01     119666.942751      69034.569552
2017-12-01     122637.257014      70748.111838
2018-01-01     139774.805386      80634.578792
2018-02-01     111900.820785      65738.670454
2018-03-01     113692.233445      66791.076376
2018-04-01     112188.019971      65907.392117
2018-05-01      95486.883233      56095.931249
2018-06-01      99281.672298      58325.265995
2018-07-01     102264.986339      60077.881367
2018-08-01     104644.514359      61475.788971
2018-09-01     100348.385300      58951.930697
2018-10-01      99644.523448      58538.431117
2018-11-01      98500.747368      57866.493966
2018-12-01     103189.922889      60621.256282
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2018-07-01     143679.037736      86835.940947
2018-08-01     140569.255785      84956.468157
2018-09-01     142432.016831      86082.273360
2018-10-01     144006.265709      87033.709176
2018-11-01     144841.526163      87538.519262
2018-12-01     169182.491165     102249.576859
2019-01-01     251002.311974     151699.386935
2019-02-01     209206.480452     128835.705090
2019-03-01     209069.150599     128751.133195
2019-04-01     215084.034840     132455.281607
2019-05-01     188350.297668     115991.834247
2019-06-01     193487.717921     119155.613683
2019-07-01     193588.063199     119217.409353
2019-08-01     196565.107114     121050.763420
2019-09-01     186032.562498     114564.502531
2019-10-01     188501.393299     116084.883526
2019-11-01     190264.243228     117170.500057
2019-12-01     194827.188376     119980.500274
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

            isr_neto_(mdp)_r  iva_neto_(mdp)_r
2019-07-01     146207.867041      82661.057693
2019-08-01     139779.429261      79026.633110
2019-09-01     140939.986164      79682.773323
2019-10-01     140169.582734      79738.113361
2019-11-01     138809.518208      78964.414980
2019-12-01     146013.204353      83062.367839
2020-01-01     179262.931875     101096.030044
2020-02-01     137859.987218      77746.677820
2020-03-01     146048.560031      84379.362499
2020-04-01     146900.370441      84871.494837
2020-05-01     132964.113094      80094.396748
2020-06-01     139602.282896      84093.071228
2020-07-01     138605.457752      83492.607639
2020-08-01     137818.657116      83018.657782
2020-09-01     139638.531686      84114.906630
2020-10-01     136634.473160      84384.938935
2020-11-01     139710.504829      86284.684575
2020-12-01     145329.773902      89755.124110
            isr_neto_(mdp)_r  iva_neto_(mdp)_r
fecha                                         
2000-01-01   

In [None]:
results_ml = pd.DataFrame(accuracies_ml)
results_ml.to_csv('../results/results_ml.csv')