In [15]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# Leitura do Excel e conversão das variáveis na primeira linha para strings
df = pd.read_excel("Narrative_MP_Brazil_Dataset_with_Controls.xlsx", sheet_name="IPCA")
df.iloc[0] = df.iloc[0].astype(str)

# Coerção das colunas (exceto 'Data') para numéricas
df[df.columns[1:]] = df[df.columns[1:]].apply(pd.to_numeric, errors='coerce')

# Criando a variável de data
df['date'] = pd.to_datetime(df['Data'], format='%d-%m-%Y')

# Formatando a variável de data
df['date'] = df['date'].dt.to_period('M')

# Ordenando o DataFrame pela variável de data
df = df.sort_values('date')

# Configurando a série temporal
df = df.set_index('date')

# Descartando a variável 'Data'
df = df.drop(columns=['Data'])


In [4]:
df.head()

Unnamed: 0_level_0,Selic,Selic_d,ibcbr,ibcbrsa,ibcbrsa_log,pim,pimsa,pimsa_log,pmcsa,pmcsa_log,...,fgv_ind,fgv_ind_log,fgv_serv,fgv_serv_log,logbrlem,logfci,ff4_tc,Choque,Choque_pos,Choque_neg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01,17.25,0.0,108.55,113.36,4.730569,83.65213,90.63329,4.506822,55.37375,4.014106,...,120.771282,4.793899,133.50734,4.894156,3.821372,4.608255,-0.177879,0.073583,0.073583,0.073583
2006-02,17.25,0.0,107.8,113.64,4.733036,80.20779,91.37976,4.515024,54.72103,4.002248,...,121.915021,4.803324,133.479233,4.893946,3.861958,4.609486,0.0,0.0,0.0,0.0
2006-03,16.5,-0.75,119.09,113.56,4.732331,92.399,90.76299,4.508252,54.81225,4.003914,...,121.594866,4.800695,133.933161,4.897341,3.851362,4.611407,-0.075017,-0.034759,-0.034759,-0.034759
2006-04,15.75,-0.75,112.61,114.24,4.738301,85.66672,91.32689,4.514445,55.45193,4.015517,...,119.820464,4.785994,135.860118,4.911626,3.875185,4.609333,0.018302,0.002962,0.002962,0.002962
2006-05,15.75,0.0,117.19,115.65,4.750568,95.35205,91.89632,4.520661,55.72637,4.020453,...,120.083283,4.788186,134.599264,4.902302,3.809868,4.609002,0.0,0.0,0.0,0.0


In [5]:
# Renomeando colunas
df.rename(columns={'logbrlem': 'log_brlem', 'logfci': 'log_fci', 'ipca_log': 'log_ipca',
                   'pimsa_log': 'log_pimsa', 'ibcbrsa': 'log_ibcbrsa'}, inplace=True)


# Criando variáveis de mudanças percentuais mensais (MoM)
df['D_log_ipca'] = df['log_ipca'].diff()
df['D_log_pimsa'] = df['log_pimsa'].diff()
df['D_log_brlem'] = df['log_brlem'].diff()
df['D_log_fci'] = df['log_fci'].diff()

# Criando variáveis de mudanças percentuais anuais (YoY)
df['YoY_selic'] = df['Selic'] - df['Selic'].shift(12)
df['YoY_ipca'] = df['log_ipca'] - df['log_ipca'].shift(12)
df['YoY_pimsa'] = df['log_pimsa'] - df['log_pimsa'].shift(12)

# Criando variáveis de mudanças nas somas dos últimos seis meses (SoS)
df['SoS_selic'] = df['Selic'] - df['Selic'].shift(6)
df['SoS_ipca'] = df['log_ipca'] - df['log_ipca'].shift(6)
df['SoS_pimsa'] = df['log_pimsa'] - df['log_pimsa'].shift(6)

# Criando variáveis de mudanças nas somas dos últimos três meses (QoQ)
df['QoQ_selic'] = df['Selic'] - df['Selic'].shift(3)
df['QoQ_ipca'] = df['log_ipca'] - df['log_ipca'].shift(3)
df['QoQ_pimsa'] = df['log_pimsa'] - df['log_pimsa'].shift(3)



In [29]:
# Choice of the maximum horizon for the local projections and organizing to save the results:
horizon = range(0,37)
controls_lag = 2

list_var = [df["log_ipca"], df["log_pimsa"], df["log_fci"], df["log_brlem"]]

for var in list_var:
    for month in horizon:
        pass
        



In [26]:

# Choice of the maximum horizon for the local projections and organizing to save the results:
horizon = range(0, 37)
controls_lag = 2

list_var = ["log_ipca", "log_pimsa", "log_fci", "log_brlem"]

# Adiciona as variáveis defasadas ao DataFrame
for col in list_var:
    for lag in range(1, controls_lag + 1):
        df[f"L{lag}.{col}"] = df[col].shift(lag)

# Inicializa os resultados com NaN
for var_name in list_var:
    df[f"LP_b_lhs_{var_name}"] = np.nan
    df[f"LP_se_lhs_{var_name}"] = np.nan

# Starting the regressions for each horizon
for month in horizon:
    # Criando a variável a ser utilizada nas Projeções Locais (LHS):
    df["LP_lhs"] = df["log_ipca"].shift(-month) - df["log_ipca"].shift(1)

    # Definindo a truncagem Newey-West e executando a regressão LP:
    nw_lag_truncation = month + 1
    controls = ["Choque"] + [f"L{lag}.{col}" for col in list_var for lag in range(1, controls_lag + 1)]

    # Alinha os índices antes da regressão
    df_reg = pd.concat([df["LP_lhs"].dropna(), df[["Choque"] + controls].dropna()], axis=1, join="inner")
    
    model = sm.OLS(df_reg["LP_lhs"], sm.add_constant(df_reg[["Choque"] + controls])).fit(cov_type='HAC', cov_kwds={'maxlags': nw_lag_truncation})

    # Salvando o coeficiente de interesse e os erros padrão
    df.loc[df.index[-1], [f"LP_b_lhs_{var_name}", f"LP_se_lhs_{var_name}"]] = [model.params["Choque"], model.bse["Choque"]]

# Limpeza das variáveis auxiliares:
df = df.drop(columns=["x_axis", "ci_ub", "ci_lb"])


ValueError: shape mismatch: value array of shape (2,4) could not be broadcast to indexing result of shape (2,)