In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from typing import List

### Constantes

In [2]:
DIR_DATASET_DADOS_BNDES: str = os.path.join('datasets', 'dados-originais')
DIR_OUTPUT: str = os.path.join('datasets')

ARQUIVO_INDICE_ATIVIDADE_ECONOMICA = os.path.join(DIR_DATASET_DADOS_BNDES, '0.1 - Exo - IBC-Br.xls')
ARQUIVO_ESTRUTURA_TERMO_TX_JUROS = os.path.join(DIR_DATASET_DADOS_BNDES, '0.2 - Exo - ETTJ.xls')
ARQUIVO_PARAMETROS_ESTRUTURA_TERMO_TX_JUROS = os.path.join(DIR_DATASET_DADOS_BNDES, '0.3 - Exo - ETTJ - progress.xls')
ARQUIVO_INDICES_ONIBUS_CAMINHOES = os.path.join(DIR_DATASET_DADOS_BNDES, '1.1 - Endo - FINAME OeC.xls')
ARQUIVO_OUTPUT_OEC_MPME = os.path.join(DIR_OUTPUT, 'dados-gerados', 'oec_mpme.csv')

DATA_CORRENTE = datetime.today().strftime("%Y-%m-%d")
TAXA_FIXA_TIF = 0.15

### Funções 

In [3]:
def calcula_custo(row: pd.core.series.Series) -> float:    
    
    #: float, spread: float, tx_tjlp: float, tx_ji: float, tx_inflacao: float, tx_di_60: float, tx_selic: float) -> float :
    data_base: str = row['data_base']
    custo_psi: float = row['custo_psi']
    spread: float = row['spread']
    tx_tjlp: float = row['A_TJLP']
    tx_selic: float = row['tx_fixa_selic']
    tx_di_60: float = row['tx_pre_prazo_medio']
    tx_inflacao: float = row['inflacao']
    tx_ji: float = row['A_Ji']    
    
    # PSI
    if (data_base >= '2009-10-01' and data_base <= '2015-12-01'):
        return custo_psi
    # TJLP
    elif (data_base >= '2016-01-01' and data_base <= '2017-12-01'):
        return spread + tx_tjlp + TAXA_FIXA_TIF
    # TLP
    elif (data_base >= '2018-01-01' and data_base <= '2020-07-01'):
         return ( (1 + (spread + TAXA_FIXA_TIF) / 100) * ( 1 + tx_ji/100) * ( 1 + tx_inflacao/100) - 1) * 100
    # SELIC    
    elif (data_base >= '2020-08-01'):
        return tx_di_60 + tx_selic + TAXA_FIXA_TIF
    else:
         return np.nan

### Exógena - Indice de Atividade Econômica

In [4]:
df_indice_atv_economica = pd.read_excel(ARQUIVO_INDICE_ATIVIDADE_ECONOMICA)
df_indice_atv_economica = df_indice_atv_economica[['A_IBC','Ano','Mês']]
df_indice_atv_economica.rename(columns={'A_IBC': 'ind_atv_econ', 'Ano': 'ano', 'Mês': 'mes'}, inplace=True)
df_indice_atv_economica = df_indice_atv_economica.dropna()
df_indice_atv_economica['data_base'] = df_indice_atv_economica.ano.map(str) + '-' + df_indice_atv_economica.mes.map("{:02}".format) + '-01'
df_indice_atv_economica = df_indice_atv_economica[(df_indice_atv_economica.data_base >= '2009-10-01') & (df_indice_atv_economica.data_base < DATA_CORRENTE)]


In [5]:
DATA_CORRENTE

'2021-12-24'

In [6]:
df_indice_atv_economica.head()

Unnamed: 0,ind_atv_econ,ano,mes,data_base
69,129.78,2009,10,2009-10-01
70,130.01,2009,11,2009-11-01
71,131.36,2009,12,2009-12-01
72,133.5,2010,1,2010-01-01
73,135.18,2010,2,2010-02-01


---

### Exógena - Estrutura Termo Taxa Juros - Planilha 1 (Plan1 - Analítico) e Planilha 2 (Consolidado)

In [7]:
# ETTJ-Plan2
df_ettj_plan2 = pd.read_excel(ARQUIVO_ESTRUTURA_TERMO_TX_JUROS, 'Plan2')
df_ettj_plan2 = df_ettj_plan2[['A_DI_12','A_DI_24','A_DI_36', 'A_DI_60', 'A_DI_120', 'Ano', 'Mês']]
df_ettj_plan2.rename(columns={'Ano': 'ano', 'Mês': 'mes'}, inplace=True)
df_ettj_plan2 = df_ettj_plan2.dropna()
df_ettj_plan2 = df_ettj_plan2.astype({'mes': int, 'ano': int})
df_ettj_plan2['data_base'] = df_ettj_plan2.ano.map(str) + '-' + df_ettj_plan2.mes.map("{:02}".format) + '-01'
df_ettj_plan2 = df_ettj_plan2[(df_ettj_plan2.data_base >= '2009-10-01') & (df_ettj_plan2.data_base < DATA_CORRENTE)]

FileNotFoundError: [Errno 2] No such file or directory: 'datasets\\dados-originais\\0.2 - Exo - ETTJ.xls'

In [8]:
## ETTJ-Plan1
df_ettj_plan1 = pd.read_excel(ARQUIVO_ESTRUTURA_TERMO_TX_JUROS, 'Plan1')
df_ettj_plan1 = df_ettj_plan1[['A_Ji','A_TJLP','A_INFLA_FINAME_OEC_MPME','A_INFLA_FINAME_OEC_Grande','Ano', 'Mês']]
df_ettj_plan1.rename(columns={'Ano': 'ano', 'Mês': 'mes'}, inplace=True)
df_ettj_plan1 = df_ettj_plan1.astype({'mes': int, 'ano': int})
df_ettj_plan1['data_base'] = df_ettj_plan1.ano.map(str) + '-' + df_ettj_plan1.mes.map("{:02}".format) + '-01'
df_ettj_plan1 = df_ettj_plan1[(df_ettj_plan1.data_base >= '2009-10-01') & (df_ettj_plan1.data_base < DATA_CORRENTE)]

In [9]:
# ParamETTJ
df_param_ettj = pd.read_excel(ARQUIVO_PARAMETROS_ESTRUTURA_TERMO_TX_JUROS)
df_param_ettj = df_param_ettj[['A_PRE_FINAME_OEC_MPME','A_PRE_FINAME_OEC_Grande','A_PRE_12','A_PRE_24', 'A_PRE_36', 'Ano', 'Mês']]
df_param_ettj.rename(columns={'Ano': 'ano', 'Mês': 'mes'}, inplace=True)
df_param_ettj = df_param_ettj.astype({'mes': int, 'ano': int})
df_param_ettj['data_base'] = df_param_ettj.ano.map(str) + '-' + df_param_ettj.mes.map("{:02}".format) + '-01'
df_param_ettj = df_param_ettj[(df_param_ettj.data_base >= '2009-10-01') & (df_param_ettj.data_base < DATA_CORRENTE)]

In [10]:
df_param_ettj.head()

Unnamed: 0,A_PRE_FINAME_OEC_MPME,A_PRE_FINAME_OEC_Grande,A_PRE_12,A_PRE_24,A_PRE_36,ano,mes,data_base
69,12.796206,12.904365,9.969168,11.560462,12.282385,2009,10,2009-10-01
70,13.062494,13.149982,9.984419,11.687233,12.488141,2009,11,2009-11-01
71,13.233439,13.393701,10.341847,11.944138,12.657941,2009,12,2009-12-01
72,13.156129,13.268781,10.452661,11.91348,12.576683,2010,1,2010-01-01
73,12.657825,12.730024,10.542231,11.700107,12.198329,2010,2,2010-02-01


---

### Endógena - Finame Ônibus e Caminhões

In [15]:
df_finame_oec = pd.read_excel(ARQUIVO_INDICES_ONIBUS_CAMINHOES, 'Plan1')
df_finame_oec = df_finame_oec[['A_DESEM_OEC', 'A_DESEM_OEC_MPME', 'A_DESEM_OEC_Grande', 'A_SPREAD_BNDES_MPME', 'A_SPREAD_BNDES_Grande', 'A_CUSTO_BNDES_MPME','A_CUSTO_BNDES_grande', 'A_VEND_OEC', 'A_SFIXA_SELIC', 'A_APROV_OEC', 'A_APROV_OEC_MPME', 'A_APROV_OEC_Grande', 'Ano', 'Mês']]
df_finame_oec.rename(columns={'Ano': 'ano', 'Mês': 'mes'}, inplace=True)
df_finame_oec.rename(columns={'A_DESEM_OEC': 'desembolso_total', 'A_DESEM_OEC_MPME': 'desembolso_mpme', 'A_DESEM_OEC_Grande': 'desembolso_grande'}, inplace=True)
df_finame_oec.rename(columns={'A_SPREAD_BNDES_MPME': 'spread_mpme', 'A_SPREAD_BNDES_Grande': 'spread_grande', 'A_CUSTO_BNDES_MPME': 'custo_mpme'}, inplace=True)
df_finame_oec.rename(columns={'A_CUSTO_BNDES_grande': 'custo_grande', 'A_VEND_OEC': 'vendas', 'A_SFIXA_SELIC': 'tx_fixa_selic'}, inplace=True)
df_finame_oec.rename(columns={'A_APROV_OEC': 'aprovacao_total', 'A_APROV_OEC_MPME': 'aprovacao_mpme', 'A_APROV_OEC_Grande': 'aprovacao_grande'}, inplace=True)
df_finame_oec['data_base'] = df_finame_oec.ano.map(str) + '-' + df_finame_oec.mes.map("{:02}".format) + '-01'
df_finame_oec = df_finame_oec[(df_finame_oec.data_base >= '2009-10-01') & (df_finame_oec.data_base < DATA_CORRENTE)]
df_finame_oec.dropna(subset=['desembolso_total'], inplace=True)

In [16]:
df_finame_oec.tail()

Unnamed: 0,desembolso_total,desembolso_mpme,desembolso_grande,spread_mpme,spread_grande,custo_mpme,custo_grande,vendas,tx_fixa_selic,aprovacao_total,aprovacao_mpme,aprovacao_grande,ano,mes,data_base
208,624.422124,450.280997,174.141127,1.0,1.1,,,13255.0,0.45,752.622363,416.449352,336.173011,2021,5,2021-05-01
209,633.369078,355.478147,277.890931,1.0,1.1,,,12867.0,0.45,710.319712,441.292606,269.027106,2021,6,2021-06-01
210,600.554013,325.667389,274.886624,1.0,1.1,,,13049.0,0.45,700.736306,383.799488,316.936818,2021,7,2021-07-01
211,674.37389,354.907705,319.466185,1.0,1.1,,,14282.0,0.45,823.176882,498.507464,324.669418,2021,8,2021-08-01
212,623.317271,389.959495,233.357776,1.0,1.1,,,12728.0,0.45,859.089811,523.693027,335.396784,2021,9,2021-09-01


### Merge Datasets

In [17]:
df_oec = pd.merge(df_finame_oec, df_indice_atv_economica, how= 'inner',on='data_base', suffixes=('','_y'))
df_oec.drop(df_oec.filter(regex='_y$').columns.tolist(),axis=1, inplace=True)
df_oec = pd.merge(df_oec, df_ettj_plan2, how= 'inner',on='data_base', suffixes=('','_y'))
df_oec.drop(df_oec.filter(regex='_y$').columns.tolist(),axis=1, inplace=True)
df_oec = pd.merge(df_oec, df_ettj_plan1, how= 'inner',on='data_base', suffixes=('','_y'))
df_oec.drop(df_oec.filter(regex='_y$').columns.tolist(),axis=1, inplace=True)
df_oec = pd.merge(df_oec, df_param_ettj, how= 'inner',on='data_base', suffixes=('','_y'))
df_oec.drop(df_oec.filter(regex='_y$').columns.tolist(),axis=1, inplace=True)


In [18]:
df_oec.head()

Unnamed: 0,desembolso_total,desembolso_mpme,desembolso_grande,spread_mpme,spread_grande,custo_mpme,custo_grande,vendas,tx_fixa_selic,aprovacao_total,...,A_DI_120,A_Ji,A_TJLP,A_INFLA_FINAME_OEC_MPME,A_INFLA_FINAME_OEC_Grande,A_PRE_FINAME_OEC_MPME,A_PRE_FINAME_OEC_Grande,A_PRE_12,A_PRE_24,A_PRE_36
0,1767.32,1388.5,349.2,3.0,3.0,4.0,4.0,13123.0,,1817.49246,...,13.306142,,6.0,5.890385,5.890385,12.796206,12.904365,9.969168,11.560462,12.282385
1,1740.6,1339.9,362.5,3.0,3.0,4.0,4.0,13219.0,,1999.128439,...,13.574884,,6.0,5.933936,5.933936,13.062494,13.149982,9.984419,11.687233,12.488141
2,1691.54,1259.6,392.2,3.0,3.0,4.0,4.0,15104.0,,2626.210087,...,13.789337,,6.0,6.082671,6.082671,13.233439,13.393701,10.341847,11.944138,12.657941
3,2344.65,1682.7,616.7,3.0,3.0,4.0,4.0,11569.0,,2441.408982,...,13.7286,,6.0,6.072903,6.072903,13.156129,13.268781,10.452661,11.91348,12.576683
4,1631.05,1160.7,423.0,3.0,3.0,4.0,4.0,9581.0,,2460.988063,...,13.446076,,6.0,5.749791,5.749791,12.657825,12.730024,10.542231,11.700107,12.198329


In [19]:
df_oec.columns

Index(['desembolso_total', 'desembolso_mpme', 'desembolso_grande',
       'spread_mpme', 'spread_grande', 'custo_mpme', 'custo_grande', 'vendas',
       'tx_fixa_selic', 'aprovacao_total', 'aprovacao_mpme',
       'aprovacao_grande', 'ano', 'mes', 'data_base', 'ind_atv_econ',
       'A_DI_12', 'A_DI_24', 'A_DI_36', 'A_DI_60', 'A_DI_120', 'A_Ji',
       'A_TJLP', 'A_INFLA_FINAME_OEC_MPME', 'A_INFLA_FINAME_OEC_Grande',
       'A_PRE_FINAME_OEC_MPME', 'A_PRE_FINAME_OEC_Grande', 'A_PRE_12',
       'A_PRE_24', 'A_PRE_36'],
      dtype='object')

---
### MPME

In [20]:
df_oec_mpme = df_oec[['ano', 'mes', 'data_base','desembolso_mpme', 'spread_mpme', 'custo_mpme', 
                      'vendas','tx_fixa_selic', 'aprovacao_mpme',  'ind_atv_econ', 
                      'A_Ji', 'A_TJLP','A_INFLA_FINAME_OEC_MPME', 'A_PRE_FINAME_OEC_MPME',
                      'A_DI_12','A_DI_24', 'A_DI_36', 'A_DI_60', 'A_DI_120']]
df_oec_mpme.rename(columns={'desembolso_mpme': 'desembolso', 
                            'spread_mpme': 'spread', 
                            'custo_mpme': 'custo_psi', 
                            'aprovacao_mpme': 'aprovacao', 
                            'A_INFLA_FINAME_OEC_MPME': 'inflacao',
                            'A_PRE_FINAME_OEC_MPME': 'tx_pre_prazo_medio'},                            
                   inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [21]:
df_oec_mpme.tail()

Unnamed: 0,ano,mes,data_base,desembolso,spread,custo_psi,vendas,tx_fixa_selic,aprovacao,ind_atv_econ,A_Ji,A_TJLP,inflacao,tx_pre_prazo_medio,A_DI_12,A_DI_24,A_DI_36,A_DI_60,A_DI_120
139,2021,5,2021-05-01,450.280997,1.0,,13255.0,0.45,416.449352,138.8,2.65,,5.519407,8.774985,5.832604,7.331494,8.088834,8.848087,9.717425
140,2021,6,2021-06-01,355.478147,1.0,,12867.0,0.45,441.292606,139.12,2.87,,5.507007,8.48522,6.43972,7.558042,8.027355,8.573779,9.514613
141,2021,7,2021-07-01,325.667389,1.0,,13049.0,0.45,383.799488,139.44,2.99,,5.456211,8.710928,6.98692,7.94097,8.345853,8.799348,9.605991
142,2021,8,2021-08-01,354.907705,1.0,,14282.0,0.45,498.507464,139.23,3.03,,4.933893,9.829825,7.998453,8.942889,9.373741,9.870852,10.580188
143,2021,9,2021-09-01,389.959495,1.0,,12728.0,0.45,523.693027,139.23,3.28,,,10.521768,8.729619,9.637906,10.057843,10.562139,11.301256


In [22]:
df_oec_mpme['custo_bndes'] = df_oec_mpme.apply(lambda row: calcula_custo(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_oec_mpme['custo_bndes'] = df_oec_mpme.apply(lambda row: calcula_custo(row), axis=1)


In [23]:
df_oec_mpme.columns

Index(['ano', 'mes', 'data_base', 'desembolso', 'spread', 'custo_psi',
       'vendas', 'tx_fixa_selic', 'aprovacao', 'ind_atv_econ', 'A_Ji',
       'A_TJLP', 'inflacao', 'tx_pre_prazo_medio', 'A_DI_12', 'A_DI_24',
       'A_DI_36', 'A_DI_60', 'A_DI_120', 'custo_bndes'],
      dtype='object')

In [24]:
df_oec_mpme.drop([ 'spread','custo_psi', 'tx_fixa_selic', 'A_Ji', 'A_TJLP', 'inflacao',  ], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [25]:
df_oec_mpme.head()

Unnamed: 0,ano,mes,data_base,desembolso,vendas,aprovacao,ind_atv_econ,tx_pre_prazo_medio,A_DI_12,A_DI_24,A_DI_36,A_DI_60,A_DI_120,custo_bndes
0,2009,10,2009-10-01,1388.5,13123.0,1437.057832,129.74,12.796206,9.969168,11.560462,12.282385,12.869093,13.306142,4.0
1,2009,11,2009-11-01,1339.9,13219.0,1497.107343,130.17,13.062494,9.984419,11.687233,12.488141,13.121518,13.574884,4.0
2,2009,12,2009-12-01,1259.6,15104.0,1580.255803,131.36,13.233439,10.341847,11.944138,12.657941,13.269531,13.789337,4.0
3,2010,1,2010-01-01,1682.7,11569.0,1853.163058,133.46,13.156129,10.452661,11.91348,12.576683,13.18188,13.7286,4.0
4,2010,2,2010-02-01,1160.7,9581.0,1774.880889,135.16,12.657825,10.542231,11.700107,12.198329,12.711503,13.446076,4.0


In [26]:
df_oec_mpme.to_csv(ARQUIVO_OUTPUT_OEC_MPME, index=False, sep=';', decimal=',')