In [302]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from time import sleep
from lxml import html
import pandas as pd
import requests
import pdb

In [303]:
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")

navegador = webdriver.Chrome(options=options)

In [304]:
navegador.get('https://cecad.cidadania.gov.br/tab_cad.php')

In [305]:
# Marcar opção "Com marcação PBF (Agosto 2023)"
checkbox = navegador.find_elements(By.CSS_SELECTOR, "input[value='PBF']")
checkbox[0].click()

In [306]:
# Selecionar o estado do Ceará
uf_selector = navegador.find_elements(By.CSS_SELECTOR, "select[name='uf_ibge']")
uf_select = Select(uf_selector[0])
uf_select.select_by_visible_text('CE - Ceará')
sleep(0.1)

In [307]:
# Obter lista de municípios do estado
p_selector = navegador.find_elements(By.CSS_SELECTOR, "select[name='p_ibge']")
p_select = Select(p_selector[0])
municipios = []
for option in p_select.options:
    municipios.append(option.text)
municipios = municipios[2:]    

In [308]:
# Selecionar um município
def municipality_selection(municipality):
    p_selector = navegador.find_elements(By.CSS_SELECTOR, "select[name='p_ibge']")
    p_select = Select(p_selector[0])
    p_select.select_by_visible_text(municipality)

In [309]:
# Selecionar uma variável
def variable_selection(variable):
    var_selector = navegador.find_elements(By.CSS_SELECTOR, "select[name='var1']")
    var_select = Select(var_selector[0])
    var_select.select_by_visible_text(variable)

In [310]:
# Clicar no botão %Total e esperar até a tabela ser carregada
def search_table(navegador):
    total_button = navegador.find_elements(By.CSS_SELECTOR, "button.btn-success")
    total_button[1].click()
    
    WebDriverWait(navegador, 99999999).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, "table.table-striped"))
    )

In [311]:
def get_table():
    table = navegador.find_elements(By.CSS_SELECTOR, "tbody")
    table = table[-1]
    table2 = table.get_attribute('outerHTML')
    table2 = html.fromstring(table2)
    rows = table2.xpath('.//tr')

    table = []

    for row in rows[:-2]:
        cols_th = row.xpath('.//th')
        cols_td = row.xpath('.//td')
        cols1 = cols_th + cols_td

        cols = []

        for col in cols1[:-1]:
            if col.text != None:
                cols.append(col.text.strip())
        table.append(cols)
    
    #pdb.set_trace()

    table[-1] = table[-1][1:-1]

    for i, val in enumerate(table[1]):
        table[1][i] = str(table[0][0]) + ' / ' + str(val)

    table = table[1:]
    return table

In [312]:
metrics = [
    'Bloco 1 - Faixa da renda total da família',
    'Bloco 1 - Faixa da renda familiar per capita',
    'Bloco 1 - Recebe PBF família',
    'Bloco 2 - Calçamento em frente ao seu domicílio',
    'Bloco 2 - Situação do domicílio',
    'Bloco 2 - Espécie do domicílio',
    'Bloco 2 - Existência de banheiro',
    'Bloco 2 - Forma de abastecimento de água',
    'Bloco 2 - Forma de coleta do lixo',
    'Bloco 2 - Forma de escoamento sanitário',
    'Bloco 2 - Material predominante nas paredes externas do domicílio',
    'Bloco 2 - Material predominante no piso do domicílio',
    'Bloco 2 - Água canalizada no domicílio',
    'Bloco 2 - Tipo de iluminação',
    'Bloco 4 - Recebe PBF pessoa',
    'Bloco 4 - Pessoa com marcação de trabalho infantil',
    'Bloco 6 - Não recebe ajuda de terceiros',
    'Bloco 7 - Curso mais elevado que a pessoa frequentou',
    'Bloco 7 - Grau de instrução',
    'Bloco 7 - Pessoa frequenta escola',
    'Bloco 7 - Pessoa sabe ler e escrever',
    'Bloco 7 - Último ano e série do curso que a pessoa frequentou',
    'Bloco 8 - Função principal',
    'Bloco 8 - Trabalho remunerado nos últimos 12 meses',
    'Bloco 12 - Situação de Rua'
]

In [313]:
data_values = []
for idx, municipio in enumerate(municipios):
    municipality_selection(municipio)

    if idx == 0:
        data_columns = ['Município']
    
    municipality_values = [municipio]

    for j, metric in enumerate(metrics):
        print(str(idx) + '   '+ str(j))

        variable_selection(metric)
        search_table(navegador)
        table = get_table()

        if idx == 0:
            data_columns += table[0]

        municipality_values += table[-1]

    data_values.append(municipality_values)    
            

0   0
0   1
0   2
0   3
0   4
0   5
0   6
0   7
0   8
0   9
0   10
0   11
0   12
0   13
0   14
0   15
0   16
0   17
0   18
0   19
0   20
0   21
0   22
0   23
0   24
1   0
1   1
1   2
1   3
1   4
1   5
1   6
1   7
1   8
1   9
1   10
1   11
1   12
1   13
1   14
1   15
1   16
1   17
1   18
1   19
1   20
1   21
1   22
1   23
1   24
2   0
2   1
2   2
2   3
2   4
2   5
2   6
2   7
2   8
2   9
2   10
2   11
2   12
2   13
2   14
2   15
2   16
2   17
2   18
2   19
2   20
2   21
2   22
2   23
2   24
3   0
3   1
3   2
3   3
3   4
3   5
3   6
3   7
3   8
3   9
3   10
3   11
3   12
3   13
3   14
3   15
3   16
3   17
3   18
3   19
3   20
3   21
3   22
3   23
3   24
4   0
4   1
4   2
4   3
4   4
4   5
4   6
4   7
4   8
4   9
4   10
4   11
4   12
4   13
4   14
4   15
4   16
4   17
4   18
4   19
4   20
4   21
4   22
4   23
4   24
5   0
5   1
5   2
5   3
5   4
5   5
5   6
5   7
5   8
5   9
5   10
5   11
5   12
5   13
5   14
5   15
5   16
5   17
5   18
5   19
5   20
5   21
5   22
5   23
5   24
6   0
6   

In [314]:
data = pd.DataFrame(data_values, columns=data_columns)
data.replace([',', '%'], ['.',''], regex=True, inplace=True)
data

Unnamed: 0,Município,Faixa da renda total da família / Até 1 S.M.,Faixa da renda total da família / Entre 1 e 2 S.M.,Faixa da renda total da família / Entre 2 e 3 S.M.,Faixa da renda total da família / Acima de 3 S.M.,Faixa da renda familiar per capita / Pobreza 1 (até R$ 109),Faixa da renda familiar per capita / Pobreza 2 (de R$ 109 a R$ 218),Faixa da renda familiar per capita / Baixa Renda,Faixa da renda familiar per capita / Acima de 1/2 S.M.,Recebe PBF família / Não,...,Função principal / Trab. doméstico com cart. de trab. assinada,Função principal / Trabalhador não-remunerado,Função principal / Militar ou servidor público,Função principal / Empregador,Função principal / Estagiário,Função principal / Aprendiz,Trabalho remunerado nos últimos 12 meses / Sim,Trabalho remunerado nos últimos 12 meses / Não,Situação de Rua / Não,Situação de Rua / Sim
0,Abaiara,79.44,14.15,3.81,2.59,59.06,5.40,16.67,18.87,32.37,...,0.01,0.00,1.26,0.00,0.00,0.00,35.63,38.99,100.00,0.00
1,Acarape,77.88,14.49,5.15,2.47,56.87,6.15,23.22,13.76,31.65,...,0.03,0.02,2.96,0.01,0.01,0.00,18.85,52.43,99.98,0.02
2,Acaraú,86.64,10.62,2.14,0.60,69.55,6.77,15.50,8.18,18.86,...,0.01,0.07,0.84,0.00,0.00,0.00,31.90,38.93,100.00,0.00
3,Acopiara,85.75,11.82,1.75,0.68,65.21,3.97,15.61,15.21,27.59,...,0.01,0.82,1.24,0.01,0.03,0.01,33.02,43.12,99.97,0.03
4,Aiuaba,87.30,10.22,1.75,0.72,67.78,2.35,14.89,14.98,28.25,...,0.02,29.62,1.04,0.02,0.00,0.00,7.39,67.81,100.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,Uruburetama,89.85,8.19,1.39,0.57,72.74,2.76,14.50,10.01,21.65,...,0.03,0.01,0.50,0.01,0.02,0.06,28.90,41.46,99.99,0.01
180,Uruoca,81.71,14.39,3.19,0.71,62.72,3.81,20.82,12.65,29.12,...,0.03,13.34,2.70,0.03,0.00,0.00,27.08,47.47,100.00,0.00
181,Varjota,78.67,15.35,4.11,1.88,57.81,5.19,20.86,16.13,34.56,...,0.02,0.88,1.42,0.00,0.02,0.00,29.05,43.87,99.99,0.01
182,Várzea Alegre,80.34,14.03,3.36,2.27,59.81,3.88,19.72,16.60,31.28,...,0.00,0.04,0.20,0.04,0.00,0.00,9.16,67.76,99.98,0.02


In [315]:
data.to_csv("cadunico_data.csv", index=False)

In [316]:
#for i in total_button:
#    print(i.get_attribute('outerHTML'))