### Importar bibliotecas necessárias

In [48]:
import pandas as pd
import requests
import json
import os

### Caminho do projeto

In [49]:
path_data = './resumos_alt/'

# Verifica os diretórios que estiverem ausentes e os cria
if not os.path.exists(path_data):
    os.makedirs(path_data)

### Entrada de dados

- Entrada com a chave de API retirada da base de dados Scopus
- Palavras-chave para a pesquisa na editora

In [50]:
# Chave pessoal, portanto, não disponibilizada
MY_API_KEY = " "
user_query = "TITLE-ABS-KEY ( 'pedrogao AND grande' ) AND PUBYEAR > 2016 AND PUBYEAR < 2024"

### Impressão do Json a ser retirado da API

- Função que permite a formatação dos dados através de suas hierarquias

In [51]:
def jprint(obj):
    text = json.dumps(obj, sort_keys=True, indent=6)
    print(text)

## Pesquisa e recolha de resultados com a API Scoupus

- É necessário acesso à API

In [52]:
headers = {
           'Accept':'application/json',
           'X-ELS-APIKey': MY_API_KEY
          }
params={
        'query' : user_query, 
        'language' : 'ENG'
       }

response = requests.get("https://api.elsevier.com/content/search/scopus", 
                        headers=headers, params=params)

print("URL de pesquisa: ",response.url)
print("Código de status: ",response.status_code)

URL de pesquisa:  https://api.elsevier.com/content/search/scopus?query=TITLE-ABS-KEY+%28+%27pedrogao+AND+grande%27+%29+AND+PUBYEAR+%3E+2016+AND+PUBYEAR+%3C+2024&language=ENG
Código de status:  200


### Normalizar os dados em forma hierarquica

In [53]:
results = response.json()
jprint(response.json())

{
      "search-results": {
            "entry": [
                  {
                        "@_fa": "true",
                        "affiliation": [
                              {
                                    "@_fa": "true",
                                    "affiliation-city": "Potenza",
                                    "affiliation-country": "Italy",
                                    "affilname": "Universit\u00e0 degli Studi della Basilicata"
                              }
                        ],
                        "article-number": "133",
                        "citedby-count": "0",
                        "dc:creator": "Leone V.",
                        "dc:identifier": "SCOPUS_ID:85154552372",
                        "dc:title": "The 2017 Extreme Wildfires Events in Portugal through the Perceptions of Volunteer and Professional Firefighters",
                        "eid": "2-s2.0-85154552372",
                        "freetoread": {
                  

In [54]:
tabela_dados = pd.json_normalize(results["search-results"],['entry'] )
tabela_dados[:1]

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:eIssn,prism:volume,...,subtypeDescription,article-number,source-id,openaccess,openaccessFlag,freetoread.value,freetoreadLabel.value,prism:issn,pii,prism:isbn
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85154552372,2-s2.0-85154552372,The 2017 Extreme Wildfires Events in Portugal ...,Leone V.,Fire,25716255,6,...,Article,133,21101020133,1,True,"[{'$': 'all'}, {'$': 'publisherfullgold'}]","[{'$': 'All Open Access'}, {'$': 'Gold'}]",,,


#### Criação de tabela com o Pandas de colunas necessária

In [55]:
df_article = tabela_dados.loc[:,('dc:title', "prism:doi", 'dc:creator')]
df_article.rename(columns= {
                            'dc:title': 'title',
                            'prism:doi': 'doi',
                            'dc:creator': 'author'
                           }, 
                           inplace=True)
df_article

Unnamed: 0,title,doi,author
0,The 2017 Extreme Wildfires Events in Portugal ...,10.3390/fire6040133,Leone V.
1,Multi-Indices Diagnosis of the Conditions That...,10.3390/fire6020056,Andrade C.
2,Field and laboratory analysis of the junction ...,10.1071/WF22161,Viegas D.X.
3,Myopia during emergency improvisation: lessons...,10.1108/MD-03-2021-0378,Cunha M.P.
4,Influence of Convectively Driven Flows in the ...,10.3390/atmos13030414,Pinto P.
5,Sub-Hourly Precipitation Extremes in Mainland ...,10.3390/cli10020028,Santos J.A.
6,The informative coverage of tragic events into...,10.15847/obsOBS15420211963,Ortiz S.L.
7,The impact on structures of the pedrógão grand...,10.3390/fire3040057,Ribeiro L.M.
8,Lightning modelling for the research of forest...,10.1016/j.atmosres.2020.104993,Couto F.T.
9,When the sender is the message: the communicat...,10.17231/comsoc.0(2020).2741,Toniolo B.P.


## Pesquisa e recolha dos resumos com os DOIs

In [56]:
abs_list = []
for item in df_article['doi']:
    response_abst = requests.get("https://api.elsevier.com/content/abstract/doi/" + str(item),
                                 headers=headers, params={   })
    
    print(response_abst.url, 'status_code:', response_abst.status_code)

    if(response_abst.status_code == 200):
        results_abst = response_abst.json()
        if 'dc:description' in results_abst["abstracts-retrieval-response"][ "coredata"]:
            abs_list.append( results_abst["abstracts-retrieval-response"][ "coredata"]['dc:description'] )
        else:
            abs_list.append("NULL")
    else:
        abs_list.append("NULL")
print(f'total coletado: {len(abs_list)}')

https://api.elsevier.com/content/abstract/doi/10.3390/fire6040133 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.3390/fire6020056 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.1071/WF22161 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.1108/MD-03-2021-0378 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.3390/atmos13030414 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.3390/cli10020028 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.15847/obsOBS15420211963 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.3390/fire3040057 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.1016/j.atmosres.2020.104993 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.17231/comsoc.0(2020).2741 status_code: 200
https://api.elsevier.com/content/abstract/doi/10.5194/tc-14-3731-2020 status_code: 200
https://api.elsevier.com/content/abstract/doi/nan stat

### Concatenar os resumos com a tabela de artigos

In [57]:
abstracts= pd.DataFrame(abs_list, columns=['resumos'])
df_article = pd.concat([df_article, abstracts], axis=1)
df_article

Unnamed: 0,title,doi,author,resumos
0,The 2017 Extreme Wildfires Events in Portugal ...,10.3390/fire6040133,Leone V.,This study aimed to explore differences in the...
1,Multi-Indices Diagnosis of the Conditions That...,10.3390/fire6020056,Andrade C.,"Forest fires, though part of a natural forest ..."
2,Field and laboratory analysis of the junction ...,10.1071/WF22161,Viegas D.X.,Background: Two fire ignitions in Pedrógão Gra...
3,Myopia during emergency improvisation: lessons...,10.1108/MD-03-2021-0378,Cunha M.P.,Purpose: The purpose of this paper is to explo...
4,Influence of Convectively Driven Flows in the ...,10.3390/atmos13030414,Pinto P.,The influence of a mesoscale convective system...
5,Sub-Hourly Precipitation Extremes in Mainland ...,10.3390/cli10020028,Santos J.A.,Sub-hourly heavy precipitation events (SHHPs) ...
6,The informative coverage of tragic events into...,10.15847/obsOBS15420211963,Ortiz S.L.,"Dramatic events, natural catastrophes and, in ..."
7,The impact on structures of the pedrógão grand...,10.3390/fire3040057,Ribeiro L.M.,"On 17 June 2017, one of the most dramatic and ..."
8,Lightning modelling for the research of forest...,10.1016/j.atmosres.2020.104993,Couto F.T.,The study aims to assess the applicability of ...
9,When the sender is the message: the communicat...,10.17231/comsoc.0(2020).2741,Toniolo B.P.,The theme of this article is the official cris...


In [75]:
symbols = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
query = ''.join([x for x in user_query if x not in symbols])
query

"TITLE-ABS-KEY ( 'pedrogao AND grande' ) AND PUBYEAR  2016 AND PUBYEAR  2024"

In [76]:
df_article.to_csv(path_data + query + '.csv', index=False)