# Brazilian Business Review – BBR

## Bibliotecas importadas

In [1]:
import requests
from bs4 import BeautifulSoup

import pandas as pd

## Funções básicas

In [2]:
def criar_query(key_word):
    return "+".join(key_word.split())

In [3]:
def listar_links_pagina(url_search):
    page = requests.get(url_search)
    soup = BeautifulSoup(page.content, 'html.parser')
    results = soup.find_all('h3', {'class': 'media-heading'})
    a_links = []
    for item in results:
        try:
            remover_links = 'editorial' in item.find('a').text.lower()
            remover_links = remover_links or 'folha de rosto' in item.find('a').text.lower()
            remover_links = remover_links or 'dados da edição ' in item.find('a').text.lower()
            if not remover_links:
                a_links.append(item.find('a')['href'])
        except:
            pass
    return a_links

In [4]:
def listar_links(url_search_text, query, pg_num=1):
    a_links = [] 
    while True:
        url_search = url_search_text.replace('__query__', query).replace('__pg_num__', str(pg_num))
        tmp = listar_links_pagina(url_search)
        if len(tmp) != 0:
            pg_num += 1
            a_links += tmp
        else:
            break
    return a_links

In [5]:
url_search_text = 'https://www.bbronline.com.br/index.php/bbr/search/search?query=__query__&searchJournal=1&authors=&title=&abstract=&galleyFullText=&discipline=&subject=&type=&coverage=&indexTerms=&dateFromMonth=01&dateFromDay=1&dateFromYear=2012&dateToMonth=12&dateToDay=31&dateToYear=2021&orderBy=score&orderDir=desc&searchPage=__pg_num__#results'

links = listar_links(url_search_text, 'contabilidade+ambiental', pg_num=1)

link = links[0]

link


'https://www.bbronline.com.br/index.php/bbr/article/view/278'

In [6]:
links

['https://www.bbronline.com.br/index.php/bbr/article/view/278']

In [7]:
page = requests.get(link)
soup = BeautifulSoup(page.content, 'html.parser')

In [8]:
def completar_lista(lista):
    if len(lista) < 6:
        lista += (6 - len(lista)) * ['']
    return lista

In [9]:
def limpar_afiliacao(afiliacao):
    afiliacao = ' '.join(afiliacao.split())
    afiliacao = afiliacao.split(',')[0]
    afiliacao = afiliacao.split(' - ')[0]
    afiliacao = afiliacao.split('(')[0]
    afiliacao = afiliacao.split('/')[0]
    return afiliacao.strip()

In [10]:
def captura_titulo(soup):
    try:
        title = soup.find('h2', {'class': 'headings'}).text
        title = ' '.join(title.split())
    except:
        title = ''
    return {'Título': title}

In [11]:
captura_titulo(soup)

{'Título': 'The Environmental balance sheet of nations: reflections on global climate change scenarios'}

In [12]:
def captura_data(soup):
    try:
        date_ol = soup.find('ol', {'class': 'breadcrumb'})
        date_li = date_ol.find_all('li')
        date = date_li[2].find('a').text
        date = ' '.join(date.split())
    except:
        date = ''
    return {'Data de Publicação': date[-4:]}

In [13]:
captura_data(soup)

{'Data de Publicação': '2012'}

In [14]:
def captura_resumo(soup):
    try:
        panel_body = soup.find('div', {'class': 'panel-body'})
        abstract = panel_body.find('p').text
    except:
        abstract = ''
    return {'Resumo': abstract}

In [15]:
captura_resumo(soup)

{'Resumo': 'The objective of this work is to prepare environmental balance sheets of countries based on the scenarios for climate change and global warming indicated by the Intergovernmental Panel on Climate Change (IPCC)of the United Nations (UN). We consider the stock of forest resources and the residual balance between emission and capture of carbon or greenhouse gases (GHGs) estimated for each country in 2020 and 2050, according to the two editions (A1B1 and A2B2) of theSpecial Report on Emission Scenarios\xa0(SRES). The study is multidisciplinary in nature, involving concepts from the areas of climate change biology, energy, geoscience, economics and accounting. The last discipline was used to delineate the research subject and served as a method, by means of the Inquired Balance Sheet technique, to measure and classify environmental assets, liabilities and net equity. We selected a sample of seven countries, the four leading developing countries (Brazil, Russia, India and China –

In [16]:
def captura_palavras_chave(soup):
    try:
        panel_body = soup.find('div', {'class': 'panel-body'})
        kw_p = panel_body.find_all('p')[1]
        kw = kw_p.text.replace('Keyword : ', '')
        kw = ' '.join(kw.split())
        kw = kw.replace(',', '.').replace(';', '.')
    except:
        kw = ''

    return {'Palavras-chave': kw}

In [17]:
captura_palavras_chave(soup)

{'Palavras-chave': 'Balance sheet of nations. global climate change. environmental net equity Balanço das Nações. balanço contábil das nações. BCN. mudanças climáticas globais. patrimônio líquido ambiental'}

In [18]:
def captura_autores(soup):
    autores = []
    try:
        autores_div = soup.find_all('div', {'class': 'authors'})
        autores = [_.find('a').text for _ in autores_div]
        #autores = [_.text for _ in autores_a]
        #autores = [' '.join(a.text.split()) for a in autores]
    except:
        pass
    qtd_autores = len(autores)
    autores = completar_lista(autores)
    autores = [qtd_autores] + autores
    
    key_autores = [f"Autor {i}" for i in range(1, 7)]
    key_autores = ['Qtd. de Autores'] + key_autores

    return dict(zip(key_autores, autores))

In [19]:
captura_autores(soup)

{'Qtd. de Autores': 6,
 'Autor 1': 'José Roberto Kassai',
 'Autor 2': 'Rafael Feltran-Barbieri',
 'Autor 3': 'Luiz Nelson Carvalho',
 'Autor 4': 'Yara Consuelo Cintra',
 'Autor 5': 'Luís Eduardo Afonso',
 'Autor 6': 'Alexandre Foschine'}

In [20]:
def captura_afiliacao(soup):
    afiliacao = []
    try:
        authors_ul = soup.find('ul', {'class': 'authors'})
        afiliacao = authors_ul.find_all('span', {'class': 'affiliation'})
        afiliacao = [' '.join(a.text.split()) for a in afiliacao]
        afiliacao = [limpar_afiliacao(a) for a in afiliacao]
    except:
        pass
    afiliacao = completar_lista(afiliacao)
    
    key_affiliation = [f"Afiliação {i}" for i in range(1, 7)]

    return dict(zip(key_affiliation, afiliacao))

In [21]:
captura_afiliacao(soup)

{'Afiliação 1': '',
 'Afiliação 2': '',
 'Afiliação 3': '',
 'Afiliação 4': '',
 'Afiliação 5': '',
 'Afiliação 6': ''}

In [22]:
def realizar_consulta(key_words_list):
    periodico_nome_dict = {'Revista': 'Brazilian Business Review – BBR'}
    url_search_text = 'https://www.bbronline.com.br/index.php/bbr/search/search?query=__query__&searchJournal=1&authors=&title=&abstract=&galleyFullText=&discipline=&subject=&type=&coverage=&indexTerms=&dateFromMonth=01&dateFromDay=1&dateFromYear=2012&dateToMonth=12&dateToDay=31&dateToYear=2021&orderBy=score&orderDir=desc&searchPage=__pg_num__#results'
    a_links = []
    for key_word in key_words_list:
        query = criar_query(key_word)
        a_links += listar_links(url_search_text, query)
        
    resultados = []
    for link in a_links:
        link_dict = {'Url Artigo': link}
        page = requests.get(link)
        soup = BeautifulSoup(page.content, 'html.parser')
        dict_artigo = {
            **captura_data(soup),
            **captura_titulo(soup),
            **captura_resumo(soup),
            **captura_palavras_chave(soup),
            **captura_autores(soup),
            **captura_afiliacao(soup),
            **link_dict,
            **periodico_nome_dict
        }
        resultados.append(dict_artigo)
    df = pd.DataFrame(resultados)
    df['Data de Publicação'] = pd.to_datetime(df['Data de Publicação'], format='%Y-%m-%d')
    return df

## Pesquisa por artigos

In [23]:
key_words_list = ['contabilidade ambiental', 'balanço social', 'relato integrado', 'nbct 15']
df = realizar_consulta(key_words_list)
df.head(3)

Unnamed: 0,Data de Publicação,Título,Resumo,Palavras-chave,Qtd. de Autores,Autor 1,Autor 2,Autor 3,Autor 4,Autor 5,Autor 6,Afiliação 1,Afiliação 2,Afiliação 3,Afiliação 4,Afiliação 5,Afiliação 6,Url Artigo,Revista
0,2012-01-01,The Environmental balance sheet of nations: re...,The objective of this work is to prepare envir...,Balance sheet of nations. global climate chang...,6,José Roberto Kassai,Rafael Feltran-Barbieri,Luiz Nelson Carvalho,Yara Consuelo Cintra,Luís Eduardo Afonso,Alexandre Foschine,,,,,,,https://www.bbronline.com.br/index.php/bbr/art...,Brazilian Business Review – BBR
1,2012-01-01,The Environmental balance sheet of nations: re...,The objective of this work is to prepare envir...,Balance sheet of nations. global climate chang...,6,José Roberto Kassai,Rafael Feltran-Barbieri,Luiz Nelson Carvalho,Yara Consuelo Cintra,Luís Eduardo Afonso,Alexandre Foschine,,,,,,,https://www.bbronline.com.br/index.php/bbr/art...,Brazilian Business Review – BBR


In [24]:
df.to_csv('../data/BBR_FUCA.csv', index=False)

In [25]:
df

Unnamed: 0,Data de Publicação,Título,Resumo,Palavras-chave,Qtd. de Autores,Autor 1,Autor 2,Autor 3,Autor 4,Autor 5,Autor 6,Afiliação 1,Afiliação 2,Afiliação 3,Afiliação 4,Afiliação 5,Afiliação 6,Url Artigo,Revista
0,2012-01-01,The Environmental balance sheet of nations: re...,The objective of this work is to prepare envir...,Balance sheet of nations. global climate chang...,6,José Roberto Kassai,Rafael Feltran-Barbieri,Luiz Nelson Carvalho,Yara Consuelo Cintra,Luís Eduardo Afonso,Alexandre Foschine,,,,,,,https://www.bbronline.com.br/index.php/bbr/art...,Brazilian Business Review – BBR
1,2012-01-01,The Environmental balance sheet of nations: re...,The objective of this work is to prepare envir...,Balance sheet of nations. global climate chang...,6,José Roberto Kassai,Rafael Feltran-Barbieri,Luiz Nelson Carvalho,Yara Consuelo Cintra,Luís Eduardo Afonso,Alexandre Foschine,,,,,,,https://www.bbronline.com.br/index.php/bbr/art...,Brazilian Business Review – BBR
