In [128]:
import pandas as pd

ipca = pd.read_csv('files/indice.csv')

ipca.head()

Unnamed: 0.1,Unnamed: 0,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,Ano
0,2025,"0,16%","1,31%",--,--,--,--,--,--,--,--,--,--,"1,47%"
1,2024,"0,42%","0,83%","0,16%","0,38%","0,46%","0,21%","0,38%","-0,02%","0,44%","0,56%","0,39%","0,52%","4,83%"
2,2023,"0,53%","0,84%","0,71%","0,61%","0,23%","-0,08%","0,12%","0,23%","0,26%","0,24%","0,28%","0,56%","4,62%"
3,2022,"0,54%","1,01%","1,62%","1,06%","0,47%","0,67%","-0,68%","-0,36%","-0,29%","0,59%","0,41%","0,62%","5,78%"
4,2021,"0,25%","0,86%","0,93%","0,31%","0,83%","0,53%","0,96%","0,87%","1,16%","1,25%","0,95%","0,73%","10,06%"


## Renomeando colunas para facilitar o entendimento

In [None]:
ipca.rename(columns={"Ano": "Variação Anual (%)", "Unnamed: 0": "Ano"}, inplace=True)
ipca = ipca[ipca['Ano'] < 2025].reset_index(drop=True) # Os valores de 2025 ainda não estão atualizados. Vamos removê-los
ipca.head()

Unnamed: 0,Ano,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,Variação Anual (%)
0,2024,"0,42%","0,83%","0,16%","0,38%","0,46%","0,21%","0,38%","-0,02%","0,44%","0,56%","0,39%","0,52%","4,83%"
1,2023,"0,53%","0,84%","0,71%","0,61%","0,23%","-0,08%","0,12%","0,23%","0,26%","0,24%","0,28%","0,56%","4,62%"
2,2022,"0,54%","1,01%","1,62%","1,06%","0,47%","0,67%","-0,68%","-0,36%","-0,29%","0,59%","0,41%","0,62%","5,78%"
3,2021,"0,25%","0,86%","0,93%","0,31%","0,83%","0,53%","0,96%","0,87%","1,16%","1,25%","0,95%","0,73%","10,06%"
4,2020,"0,21%","0,25%","0,07%","-0,31%","-0,38%","0,26%","0,36%","0,24%","0,64%","0,86%","0,89%","1,35%","4,52%"


## Alterando os tipos de dados
### Atualmente, nossos dados estão como string, ou obj

In [130]:
ipca.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Ano                 25 non-null     int64 
 1   Jan                 25 non-null     object
 2   Fev                 25 non-null     object
 3   Mar                 25 non-null     object
 4   Abr                 25 non-null     object
 5   Mai                 25 non-null     object
 6   Jun                 25 non-null     object
 7   Jul                 25 non-null     object
 8   Ago                 25 non-null     object
 9   Set                 25 non-null     object
 10  Out                 25 non-null     object
 11  Nov                 25 non-null     object
 12  Dez                 25 non-null     object
 13  Variação Anual (%)  25 non-null     object
dtypes: int64(1), object(13)
memory usage: 2.9+ KB


In [131]:

for column in ipca.columns[1:]: # não vamos mexer na coluna "Ano"
    ipca[column] = ipca[column]\
        .str.replace(',', '.')\
        .str.replace('%', '')\
        .astype('float64')

ipca.head()

Unnamed: 0,Ano,Jan,Fev,Mar,Abr,Mai,Jun,Jul,Ago,Set,Out,Nov,Dez,Variação Anual (%)
0,2024,0.42,0.83,0.16,0.38,0.46,0.21,0.38,-0.02,0.44,0.56,0.39,0.52,4.83
1,2023,0.53,0.84,0.71,0.61,0.23,-0.08,0.12,0.23,0.26,0.24,0.28,0.56,4.62
2,2022,0.54,1.01,1.62,1.06,0.47,0.67,-0.68,-0.36,-0.29,0.59,0.41,0.62,5.78
3,2021,0.25,0.86,0.93,0.31,0.83,0.53,0.96,0.87,1.16,1.25,0.95,0.73,10.06
4,2020,0.21,0.25,0.07,-0.31,-0.38,0.26,0.36,0.24,0.64,0.86,0.89,1.35,4.52


In [132]:
months_dict = {
    'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
    'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
    'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
}

ipca_cleaned = pd.melt(
    ipca.drop(columns=['Variação Anual (%)']),
    id_vars=['Ano'],
    var_name='month',
    value_name='inflacao'
    )
ipca_cleaned['ano_mes'] = ipca_cleaned['Ano'].astype(str) + '-' + ipca_cleaned['month'].map(months_dict)

ipca_cleaned = ipca_cleaned[['ano_mes', 'inflacao']]
ipca_cleaned = ipca_cleaned.sort_values(by='ano_mes').reset_index(drop=True)

ipca_cleaned.head(20)

Unnamed: 0,ano_mes,inflacao
0,2000-01,0.62
1,2000-02,0.13
2,2000-03,0.22
3,2000-04,0.42
4,2000-05,0.01
5,2000-06,0.23
6,2000-07,1.61
7,2000-08,1.31
8,2000-09,0.23
9,2000-10,0.14


In [133]:
ipca_cleaned['inflacao_acumulada'] = (1+ ipca_cleaned['inflacao'] / 100).cumprod() - 1
ipca_cleaned

Unnamed: 0,ano_mes,inflacao,inflacao_acumulada
0,2000-01,0.62,0.006200
1,2000-02,0.13,0.007508
2,2000-03,0.22,0.009725
3,2000-04,0.42,0.013965
4,2000-05,0.01,0.014067
...,...,...,...
295,2024-08,-0.02,3.385453
296,2024-09,0.44,3.404749
297,2024-10,0.56,3.429415
298,2024-11,0.39,3.446690
