In [1]:
from selenium import webdriver
import requests
import pandas as pd
import os
import platform
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

import json
from requests import Session
import time

In [2]:
def get_webdriver_path():
    
    my_os = platform.system()
    
    if my_os == 'Windows':
        return os.path.abspath(os.path.join('gecko_drivers', 'win64.exe'))
    else:
        return os.path.abspath(os.path.join('gecko_drivers', 'linux64.exe'))

In [3]:
def listar_links_temas(home):
    
    with requests.get(home) as r:
        html = r.text
    
    sopa = BeautifulSoup(html)
    box_icones = sopa.find('div', {'id' : "home-box-indicadores"})
    icones = box_icones.find_all('a', {'class' : "link-padrao d-block text-center"})
    
    if home.endswith(r'/'):
        home = home[:-1]
    
    links = []
    for icone in icones:
        slug = icone.get('href')
        link = home + slug
        links.append(link)
        
    return links

In [4]:
def entrar_pagina_tema(browser, link_tema):
    
    browser.get(link_tema)

def listar_elementos_tema(browser):
    
    els = browser.find_elements_by_class_name('item-indicador-search')
    
    return els

def parsear_elemento_tema(el):
    
    desc_indi = el.get_attribute('title')
    cod_indi = el.get_attribute('cd_indicador')
    
    parsed = {
            'codigo' : cod_indi,
            'desc' : desc_indi,
            'element' : el
        }
    
    return parsed

def parsear_todos_els_tema(els):
    
    parsed_data = {}
    
    for el in els:
        data = parsear_elemento_tema(el)
        codigo = data['codigo']
        parsed_data[codigo] = data
    
    return parsed_data
        

In [5]:
def wait_aparecer_animacao(browser):


    try:
        element = WebDriverWait(browser, 10).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "ring"))
        )
    except Exception as e:
        print(e)

def wait_desaparecer_animacao(browser):


    try:
        element = WebDriverWait(browser, 10).until(
            EC.invisibility_of_element_located((By.CLASS_NAME, "ring"))
        )
    except Exception as e:
        print(e)
              
              
def clickar_animacao(browser, element):
    
    try:
        element.click()
    except ElementClickInterceptedException:
        browser.execute_script("arguments[0].scrollIntoView();", element)

    #primeira wait para garantir que animacao ainda nao apareceu
    wait_desaparecer_animacao(browser)
    
    wait_aparecer_animacao(browser)
    
    wait_desaparecer_animacao(browser)


In [6]:
def wait_card_periodos(browser):
    
    element = WebDriverWait(browser, 10).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "item-periodo-search"))
        )

def wait_periodo(browser, el_periodo):
    
    element = WebDriverWait(browser, 10).until(
                EC.element_to_be_clickable(el_periodo)
            )

def listar_periodos_indicador(browser):
    
    wait_card_periodos(browser)
    
    primeiro_periodo = browser.find_element_by_class_name('item-periodo-search')
    
    wait_periodo(browser, primeiro_periodo)

    periodos = browser.find_elements_by_class_name('item-periodo-search')
    
    return periodos

def back_to_top(browser):
    
    browser.switch_to.default_content()
    body = browser.find_element_by_tag_name('body')
    body.send_keys(Keys.CONTROL + Keys.HOME)
    
def back_top_reg(browser):
    
    back_to_top(browser)
    
    el = browser.find_element_by_id("badge-localidade")
    
    browser.execute_script("arguments[0].scrollIntoView();", el)


def clicar_periodos(browser, periodos):
    
    for el in periodos:
        try:
            
            wait_periodo(browser, el)
            
            el.click()

        except ElementClickInterceptedException:

            #scroll down para aparecer o item
            browser.execute_script("arguments[0].scrollIntoView();", el)

            wait_periodo(browser, el)

            el.click()
            
    back_top_reg(browser)

In [23]:
def get_arvore_reg(browser):
    
    items = browser.find_elements_by_class_name('sui-treeview-item')
    
    return items[-1]


def clicar_regionalizacao(browser):
    
    back_top_reg(browser)

    arvore_reg = get_arvore_reg(browser)
    #browser.execute_script("arguments[0].scrollIntoView();", arvore_reg)

    WebDriverWait(browser, 10).until(
                EC.element_to_be_clickable(arvore_reg)
            )
    #dois cliques abre o menu
    actionChains = ActionChains(browser)
    actionChains.double_click(arvore_reg).perform()
    
    arvore_reg.click()
    
    #um clique seleciona geral
    
    back_top_reg(browser)
    
    #botao_geral = browser.find_element_by_id('btn-select-all-localidade')
    #botao_geral.click()

In [8]:
def pesquisar(browser):
    
    botao = browser.find_element_by_id('btn-search')
    
    botao.click()
    
    wait_desaparecer_animacao(browser)
    
    wait_aparecer_animacao(browser)
    
    wait_desaparecer_animacao(browser)

In [9]:
def download_json(browser):
    
    session = Session()
    
    selenium_cookies = browser.get_cookies()
    for cookie in selenium_cookies:
        session.cookies.set(cookie["name"], cookie["value"])
        
    link_download = 'https://observasampa.prefeitura.sp.gov.br/PesquisaDeIndicadores/DownloadIndicador/.json/Indicadores'
    
    with session.get(link_download) as r:
        c = r.content
        t = c.decode('latin-1')
        t = t.encode('utf-8')
        dados = json.loads(t)
        
    return dados

In [10]:
def loop_tema(browser, link_tema, dados_retorno):
    
    entrar_pagina_tema(browser, link_tema)
    els = listar_elementos_tema(browser)
    els = parsear_todos_els_tema(els)
    
    for codigo, el in els.items():
        
        not_stale = listar_elementos_tema(browser)
        not_stale = parsear_todos_els_tema(not_stale)
        
        elemento = not_stale[codigo]['element']
        
        el.pop('element')
        
        clickar_animacao(browser, elemento)
        
        periodos = listar_periodos_indicador(browser)
        clicar_periodos(browser, periodos)
        
        clicar_regionalizacao(browser)
        
        pesquisar(browser)
        
        dados = download_json(browser)
        
        el['data'] = dados
        dados_retorno[codigo] = el
        
        entrar_pagina_tema(browser, link_tema)
        
    return els

In [11]:
browser = webdriver.Firefox(executable_path=get_webdriver_path())

  browser = webdriver.Firefox(executable_path=get_webdriver_path())


In [12]:
home = 'https://observasampa.prefeitura.sp.gov.br/'

In [13]:
browser.get(home)

In [14]:
links_temas = listar_links_temas(home)

In [24]:
for tema in links_temas[1:]:
    dados = {}
    
    loop_tema(browser, tema, dados)
    
    nom_file = links_temas[0].split('/')[-1] + '.json'
    
    with open(os.path.join('generated_data', nom_file), 'w') as f:
        json.dump(dados, f)

  els = browser.find_elements_by_class_name('item-indicador-search')
  primeiro_periodo = browser.find_element_by_class_name('item-periodo-search')
  periodos = browser.find_elements_by_class_name('item-periodo-search')
  body = browser.find_element_by_tag_name('body')
  el = browser.find_element_by_id("badge-localidade")
  items = browser.find_elements_by_class_name('sui-treeview-item')
  botao = browser.find_element_by_id('btn-search')


Message: 

Message: 

Message: 

Message: 

Message: 

Message: 



KeyboardInterrupt: 

In [None]:
browser.close()

In [None]:
back_top_reg(browser)