In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

In [2]:
def gerar_sopa_sugesotes():
    
    url_sugestoes = ('https://participemais.prefeitura.sp.gov.br/'
        'legislation/processes/116/topics/comments#process-list')
    with requests.get(url_sugestoes) as r:
        assert r.status_code == 200
        html_pg = r.text
    sopa = BeautifulSoup(html_pg)
    
    return sopa

In [3]:
def pegar_todas_sugestoes(sopa):
    
    bloco_comments = sopa.find('div', 
                               {'class' : 'small-12 medium-9 column row legislation-comments end'})
    sugestoes_por_trecho = bloco_comments.find_all('div', {'class' : 'margin'})
    
    return sugestoes_por_trecho

In [4]:
def pegar_dados_meta_iniciativa(trecho):

    div_meta = trecho.find('div', {'class' : "comment-section no-margin-bottom"})
    paragrafs = trecho.find_all('p')
    
    meta_iniciativa = div_meta.find('h4').text.strip()
    desc = paragrafs[0].text.strip()
    
    if len(paragrafs) > 1:
        indicador = paragrafs[1].text.strip()
        indicador = indicador.replace('\xa0', '')
    else:
        indicador = ''
        
    return meta_iniciativa, desc, indicador

In [5]:
def div_sugestoes(trecho):
    
    return trecho.find('div', {'class' : 'topic-comments'})

In [6]:
def pegar_qtd_sugests(sugests_div):
    
    qtd_sugests = sugests_div.find('div', {'class' : 'topic-comments-count'}).text.strip()
    if qtd_sugests.lower().startswith('uma'):
        qtd_sugests = 1
    else:
        qtd_sugests = int(''.join([c for c in qtd_sugests if c.isdigit()]))
    
    return qtd_sugests

In [7]:
def gerar_sugests_list(trecho):
    
    sugests_div = div_sugestoes(trecho)
    sugests_list = sugests_div.find_all('div', {'class' : 'topic-vote-comment-body'})
    assert len(sugests_list) == pegar_qtd_sugests(sugests_div)
    
    return sugests_list

In [8]:
def categoria_sugest(sugest):
    
    cat = sugest.find('div', {'class' : "inline-block topic-vote-comment-option"})
    
    return cat.text.strip()

In [9]:
def nome_usuario(sugest):
    
    nom = sugest.find('span', {'class' : 'user-name'})
    
    
    return nom.text.strip()

In [10]:
def url_usuario(sugest):

    url = sugest.find('span', {'class' : 'user-name'}).find('a')
    
    return url['href']

In [11]:
def pegar_comment(sugest):
    
    p = sugest.find('p', 
                    {'class' : "no-margin-bottom topic-vote-comment-text"})
    
    return p.text.strip()

In [12]:
sopa = gerar_sopa_sugesotes()

In [13]:
sugestoes_por_trecho = pegar_todas_sugestoes(sopa)

In [14]:
total_dados = []

for trecho in sugestoes_por_trecho:
    
    meta_ini, desc, indi = pegar_dados_meta_iniciativa(trecho)
    sugestoes = gerar_sugests_list(trecho)
    
    for sugest in sugestoes:
        dados = {'meta_iniciativa' : meta_ini,
                'descricao' : desc,
                'indicador' : indi,
                'categoria' : categoria_sugest(sugest),
                'nome_usuario' : nome_usuario(sugest),
                'url_usuario' : url_usuario(sugest),
                'texto_sugestao' : pegar_comment(sugest)}
        
        total_dados.append(dados)

In [15]:
df = pd.DataFrame(total_dados)

In [16]:
path_dados = '..Dados_originais/Dados_site/'

In [17]:
hoje = datetime.datetime.today()
nom_arquivo = f'revisoes_pdm_{hoje.day}_{hoje.month}.xlsx'

In [18]:
df.to_excel(path_dados+nom_arquivo)