### Import libs

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from pathlib import Path

### Declarate URL and PATH

In [2]:
URL = "https://www.ipeadata.gov.br/ExibeSerieR.aspx?MAXDATA=2025&MINDATA=2012&TNIVID=2&TPAID=1&module=S&serid=2096726935&stub=1"

OUT_CSV = Path("indice_de_gini.csv") 

###  Making Request

In [3]:
html = requests.get(URL, headers={"User-Agent": "Mozilla/5.0"}).text

### Extract principal data

In [12]:
soup  = BeautifulSoup(html, "lxml")
table = soup.select_one("table#grd_DXMainTable")

In [16]:
table

<table border="0" cellpadding="0" cellspacing="0" class="dxgvTable" id="grd_DXMainTable" onclick="aspxGVTableClick('grd', event);" style="width:100%;border-collapse:collapse;empty-cells:show;">
<tr id="grd_DXHeadersRow0">
<td class="dxgvHeader" id="grd_col1" onmousedown="aspxGVHeaderMouseDown('grd', this, event);" style="width:25px;border-top-width:0px;border-left-width:0px;"><table border="0" cellpadding="0" cellspacing="0" style="width:100%;border-collapse:collapse;">
<tr>
<td>Sigla</td><td style="width:1px;text-align:right;"><span class="dx-vam"> </span><img alt="(Ascending)" class="dx-vam" src="img/sortUp.jpg" style="margin-left:5px;margin-right:5px;"/><img alt="[Filter]" class="dxGridView_gvHeaderFilter dxgv__hfb dx-vam" src="/DXR.axd?r=1_19-JQ0Ub" style="cursor:default;"/></td>
</tr>
</table></td><td class="dxgvHeader" id="grd_col2" onmousedown="aspxGVHeaderMouseDown('grd', this, event);" style="width:300px;border-top-width:0px;border-left-width:0px;"><table border="0" cellpaddin

In [14]:
header_cells = table.select("tr#grd_DXHeadersRow0 td")[2:]
anos = [td.get_text(strip=True) for td in header_cells]     # ['2012', …, '2024']
idx_2022 = anos.index("2022")                               # posição da coluna 2022
rows = table.select("tr[id^=grd_DXDataRow]")

### Convert to list of dictionaries

In [20]:
records = []
for tr in rows:
    cols = [td.get_text(strip=True).replace("\xa0", " ")          # limpa &nbsp;
            for td in tr.find_all("td")]

    # Garante que a linha tem o total esperado
    if len(cols) != 15:
        continue          # ou lance exceção se preferir

    (sigla, estado,
     v2012, v2013, v2014, v2015, v2016,
     v2017, v2018, v2019, v2020, v2021,
     v2022, v2023, v2024) = cols

    records.append({
        "UF":     sigla,
        "Estado": estado,
        "2012":   float(v2012.replace(",", ".")),
        "2013":   float(v2013.replace(",", ".")),
        "2014":   float(v2014.replace(",", ".")),
        "2015":   float(v2015.replace(",", ".")),
        "2016":   float(v2016.replace(",", ".")),
        "2017":   float(v2017.replace(",", ".")),
        "2018":   float(v2018.replace(",", ".")),
        "2019":   float(v2019.replace(",", ".")),
        "2020":   float(v2020.replace(",", ".")),
        "2021":   float(v2021.replace(",", ".")),
        "2022":   float(v2022.replace(",", ".")),
        "2023":   float(v2023.replace(",", ".")),
        "2024":   float(v2024.replace(",", "."))
    })


In [21]:
print(records)

[{'UF': 'AC', 'Estado': 'Acre', '2012': 0.566, '2013': 0.553, '2014': 0.529, '2015': 0.549, '2016': 0.563, '2017': 0.545, '2018': 0.558, '2019': 0.559, '2020': 0.515, '2021': 0.539, '2022': 0.523, '2023': 0.511, '2024': 0.504}, {'UF': 'AL', 'Estado': 'Alagoas', '2012': 0.503, '2013': 0.51, '2014': 0.527, '2015': 0.525, '2016': 0.523, '2017': 0.525, '2018': 0.55, '2019': 0.527, '2020': 0.51, '2021': 0.526, '2022': 0.498, '2023': 0.486, '2024': 0.518}, {'UF': 'AM', 'Estado': 'Amazonas', '2012': 0.589, '2013': 0.581, '2014': 0.562, '2015': 0.553, '2016': 0.558, '2017': 0.591, '2018': 0.544, '2019': 0.566, '2020': 0.533, '2021': 0.541, '2022': 0.509, '2023': 0.512, '2024': 0.474}, {'UF': 'AP', 'Estado': 'Amapá', '2012': 0.528, '2013': 0.513, '2014': 0.523, '2015': 0.548, '2016': 0.555, '2017': 0.589, '2018': 0.547, '2019': 0.513, '2020': 0.5, '2021': 0.53, '2022': 0.531, '2023': 0.491, '2024': 0.509}, {'UF': 'BA', 'Estado': 'Bahia', '2012': 0.563, '2013': 0.535, '2014': 0.528, '2015': 0.52

### Create Dataframe

In [22]:
df = pd.DataFrame(records).set_index("UF")
df.head()

Unnamed: 0_level_0,Estado,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
UF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AC,Acre,0.566,0.553,0.529,0.549,0.563,0.545,0.558,0.559,0.515,0.539,0.523,0.511,0.504
AL,Alagoas,0.503,0.51,0.527,0.525,0.523,0.525,0.55,0.527,0.51,0.526,0.498,0.486,0.518
AM,Amazonas,0.589,0.581,0.562,0.553,0.558,0.591,0.544,0.566,0.533,0.541,0.509,0.512,0.474
AP,Amapá,0.528,0.513,0.523,0.548,0.555,0.589,0.547,0.513,0.5,0.53,0.531,0.491,0.509
BA,Bahia,0.563,0.535,0.528,0.522,0.539,0.59,0.55,0.557,0.537,0.546,0.511,0.49,0.481


### Save Data

In [23]:
df[["Estado", "2022"]].sort_values("2022").to_csv("/home/fbotero/Documents/POS/raw/gini_uf_2022.csv", float_format="%.2f", encoding="utf-8")