In [1]:
import pandas as pd

In [2]:
df_idh = pd.read_csv('../data/raw/data_idh.csv')
df_idh.head()

Unnamed: 0,Territorialidades,IDHM 2000,IDHM 2010,IDHM 2012,IDHM 2013,IDHM 2014,IDHM 2015,IDHM 2016,IDHM 2017,IDHM 2018,IDHM 2019
0,Brasil,0.612,0.727,0.746,0.753,0.762,0.765,0.771,0.774,0.78,0.785
1,Acre,0.517,0.663,0.701,0.706,0.715,0.718,0.723,0.712,0.733,0.739
2,Alagoas,0.471,0.631,0.651,0.648,0.666,0.666,0.68,0.679,0.689,0.687
3,Amapá,0.577,0.708,0.707,0.734,0.725,0.728,0.733,0.732,0.741,0.737
4,Amazonas,0.515,0.674,0.691,0.702,0.706,0.711,0.711,0.728,0.718,0.726


In [3]:
# Remove a tag 'IDHM' do nome das colunas
df_idh.columns = df_idh.columns.str.replace('IDHM ', '')
df_idh.head()

Unnamed: 0,Territorialidades,2000,2010,2012,2013,2014,2015,2016,2017,2018,2019
0,Brasil,0.612,0.727,0.746,0.753,0.762,0.765,0.771,0.774,0.78,0.785
1,Acre,0.517,0.663,0.701,0.706,0.715,0.718,0.723,0.712,0.733,0.739
2,Alagoas,0.471,0.631,0.651,0.648,0.666,0.666,0.68,0.679,0.689,0.687
3,Amapá,0.577,0.708,0.707,0.734,0.725,0.728,0.733,0.732,0.741,0.737
4,Amazonas,0.515,0.674,0.691,0.702,0.706,0.711,0.711,0.728,0.718,0.726


In [4]:
# Adiciona IDH de anos faltantes iguais aos do ano anterior
df_idh_updated = df_idh.copy()
df_idh_updated['2009'] = df_idh_updated['2010']
df_idh_updated['2011'] = df_idh_updated['2010']
df_idh_updated = df_idh_updated.drop('2000', axis=1)

df_idh_updated = df_idh_updated.reindex(sorted(df_idh_updated), axis=1)

df_idh_updated.head()

Unnamed: 0,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,Territorialidades
0,0.727,0.727,0.727,0.746,0.753,0.762,0.765,0.771,0.774,0.78,0.785,Brasil
1,0.663,0.663,0.663,0.701,0.706,0.715,0.718,0.723,0.712,0.733,0.739,Acre
2,0.631,0.631,0.631,0.651,0.648,0.666,0.666,0.68,0.679,0.689,0.687,Alagoas
3,0.708,0.708,0.708,0.707,0.734,0.725,0.728,0.733,0.732,0.741,0.737,Amapá
4,0.674,0.674,0.674,0.691,0.702,0.706,0.711,0.711,0.728,0.718,0.726,Amazonas


In [5]:
# Remove a linha referente ao IDH do Brasil
df_idh_updated = df_idh_updated.drop(df_idh_updated.index[0])

# Substitui pelas siglas UF
uf_dict = {
    'Acre': 'AC',
    'Alagoas': 'AL',
    'Amapá': 'AP',
    'Amazonas': 'AM',
    'Bahia': 'BA',
    'Ceará': 'CE',
    'Distrito Federal': 'DF',
    'Espírito Santo': 'ES',
    'Goiás': 'GO',
    'Maranhão': 'MA',
    'Mato Grosso': 'MT',
    'Mato Grosso do Sul': 'MS',
    'Minas Gerais': 'MG',
    'Pará': 'PA',
    'Paraíba': 'PB',
    'Paraná': 'PR',
    'Pernambuco': 'PE',
    'Piauí': 'PI',
    'Rio de Janeiro': 'RJ',
    'Rio Grande do Norte': 'RN',
    'Rio Grande do Sul': 'RS',
    'Rondônia': 'RO',
    'Roraima': 'RR',
    'Santa Catarina': 'SC',
    'São Paulo': 'SP',
    'Sergipe': 'SE',
    'Tocantins': 'TO'
}

df_idh_updated['Territorialidades'] = df_idh_updated['Territorialidades'].replace(uf_dict)

df_idh_updated.head()

Unnamed: 0,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,Territorialidades
1,0.663,0.663,0.663,0.701,0.706,0.715,0.718,0.723,0.712,0.733,0.739,AC
2,0.631,0.631,0.631,0.651,0.648,0.666,0.666,0.68,0.679,0.689,0.687,AL
3,0.708,0.708,0.708,0.707,0.734,0.725,0.728,0.733,0.732,0.741,0.737,AP
4,0.674,0.674,0.674,0.691,0.702,0.706,0.711,0.711,0.728,0.718,0.726,AM
5,0.66,0.66,0.66,0.678,0.68,0.694,0.701,0.705,0.71,0.71,0.718,BA


In [6]:
# Utiliza a função melt para reformatar o dataframe
df_idh_final = pd.melt(df_idh_updated, id_vars='Territorialidades', var_name='ANO_REF', value_name='IDH')

# Renomeia a coluna 'Territorialidades' para 'UF'
df_idh_final = df_idh_final.rename(columns={'Territorialidades': 'UF'})

df_idh_final.head()

Unnamed: 0,UF,ANO_REF,IDH
0,AC,2009,0.663
1,AL,2009,0.631
2,AP,2009,0.708
3,AM,2009,0.674
4,BA,2009,0.66


In [7]:
# Salva o dataframe como arquivo CSV
df_idh_final.to_csv('../data/interim/idh_interim.csv', index=False)