# **Getting data using Python SDK API from Elsevier Developer Portal**
This notebook illustrates downloading using Scopus Python API. For details and API key, refer https://dev.elsevier.com/. This code can be used to
 " Get programmatic access to:
1.   citation data and abstracts from virtually all relevant scholarly journals, as indexed by Scopus, Elsevier's citation database.
2.   journals and books published by Elsevier on ScienceDirect full-text platform;\
3. engineering resources available on Engineering Village.
4. curated abstracts, indices and other metadata indexed by Embase, Elsevier's biomedical abstract and indexing database. "









In [None]:
# Install elaspy module
!pip install elsapy
# Importing packages
from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch
import json
import requests
from xml.etree import ElementTree as et
import pandas as pd

Collecting elsapy
  Downloading https://files.pythonhosted.org/packages/d8/7b/934ef0e29ebc283d60ef9ae78c1f583ef5ca652144dc8215bba4de9d9fde/elsapy-0.5.0-py3-none-any.whl
Installing collected packages: elsapy
Successfully installed elsapy-0.5.0


In [None]:
# API key can be obtained from https://dev.elsevier.com/ and is to be save as json file.
Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
apikey=config['apikey']

In [None]:
## Initialize client
client = ElsClient(apikey)

In [None]:
## Initialize doc search object using Scopus and execute search, retrieving 
# All results, query can be change as per needs as per https://dev.elsevier.com/tecdoc_search_request.html
# ISSN code to be changed as per Journal
doc_srch = ElsSearch("ISSN(0360-5442) AND PUBYEAR > 1994 AND PUBYEAR < 1998",'scopus')
doc_srch.execute(client, get_all = True)
print ("doc_srch has", len(doc_srch.results), "results.")

doc_srch has 383 results.


In [None]:
# Extract pii identifier
pii=[]
for x in doc_srch.results:
  try:
    pii.append(x['pii']) 
  except:
    continue
print ("Check -  No of PII", len(pii))

Check -  No of PII 382


In [None]:
df=pd.DataFrame()
i=0
# Using PII identifier loop through all journals and extract title, cover date, keywords and other relevant details 
# API end point + path  : https://api.elsevier.com/content/article/pii/
for pii_id in pii:
  url="https://api.elsevier.com/content/article/pii/"+ pii_id  +"?APIKey=" + apikey

  # Parsing using requests library
  response=requests.get(url)
  tree = et.ElementTree(et.fromstring(response.content))
  root = tree.getroot()

  # Handling exception
  try:
    title = root[0].find('{http://purl.org/dc/elements/1.1/}title').text
    coverdate = root[0].find('{http://prismstandard.org/namespaces/basic/2.0/}coverDate').text
    publication = root[0].find('{http://prismstandard.org/namespaces/basic/2.0/}publicationName').text
    abstract = root[0].find('{http://purl.org/dc/elements/1.1/}description').text
    keywords =root[0].findall('{http://purl.org/dc/terms/}subject')
  except:
    title=""
    coverdate=""
    publication=""
    abstract=""
    keywords=""

  
  k=[]
  for keyword in keywords:
    k.append(keyword.text)

  keywordstr = ','.join(map(str, k)) 

  if publication is None:
    publication=""
  
  if coverdate is None:
    coverdate=""
  
  if title is None:
    title=""

  if keywordstr is None:
    keywordstr=""
  
  if abstract is None:
    abstract=""
  
  print("--------------------------------------")
  print("Publication: " + publication)
  print("Cover Date: " + coverdate)
  print("Title: " + title)
  print("Keywords: " + keywordstr)
  print("Abstract: " + abstract)
  print("PII: " + pii_id)
  print("--------------------------------------")

  i+=1
  print("Iteration:", i,"/",len(pii))
  new_row = pd.Series(data={'Publication':publication, 'Cover Date':coverdate, 'Title':title, 'Keywords':keywordstr, 'Abstract': abstract, 'PII':pii_id})
  new_row_df = pd.DataFrame([new_row])
  df = pd.concat([df, new_row_df])


Iteration: 1 / 382
Iteration: 2 / 382
Iteration: 3 / 382
Iteration: 4 / 382
Iteration: 5 / 382
Iteration: 6 / 382
Iteration: 7 / 382
Iteration: 8 / 382
Iteration: 9 / 382
Iteration: 10 / 382
Iteration: 11 / 382
Iteration: 12 / 382
Iteration: 13 / 382
Iteration: 14 / 382
Iteration: 15 / 382
Iteration: 16 / 382
Iteration: 17 / 382
Iteration: 18 / 382
Iteration: 19 / 382
Iteration: 20 / 382
Iteration: 21 / 382
Iteration: 22 / 382
Iteration: 23 / 382
Iteration: 24 / 382
Iteration: 25 / 382
Iteration: 26 / 382
Iteration: 27 / 382
Iteration: 28 / 382
Iteration: 29 / 382
Iteration: 30 / 382
Iteration: 31 / 382
Iteration: 32 / 382
Iteration: 33 / 382
Iteration: 34 / 382
Iteration: 35 / 382
Iteration: 36 / 382
Iteration: 37 / 382
Iteration: 38 / 382
Iteration: 39 / 382
Iteration: 40 / 382
Iteration: 41 / 382
Iteration: 42 / 382
Iteration: 43 / 382
Iteration: 44 / 382
Iteration: 45 / 382
Iteration: 46 / 382
Iteration: 47 / 382
Iteration: 48 / 382
Iteration: 49 / 382
Iteration: 50 / 382
Iteration

In [None]:
# Extracted information stored as df
df

Unnamed: 0,Publication,Cover Date,Title,Keywords,Abstract,PII
0,,,,,,360544295000240
0,Energy,1997-09-30,Modelling welfare effects of a liberalisation ...,,\n The Dutch electricity sect...,S0360544297000248
0,Energy,1997-01-31,Optimal performance of an irreversible refrige...,,\n An IRWTHS may be treated a...,S0360544296000904
0,Energy,1997-08-31,Numerical simulation of the flow and combustio...,,\n Numerical simulations of g...,S036054429700008X
0,Energy,1997-08-31,Analysis of oxygen-enriched combustion for ste...,,\n The technical feasibility ...,S0360544296001703
...,...,...,...,...,...,...
0,Energy,1995-02-28,Using customer outage costs in electricity rel...,,\n We propose an economic ele...,0360544294000639
0,Energy,1995-08-31,Diffuse solar radiation correlations: Applicat...,,\n In order to obtain the dif...,0360544295000229
0,Energy,1995-02-28,Thermodynamic analysis of the use of pressure ...,,"\n In this paper, we analyze ...",036054429400074D
0,Energy,1995-05-31,Commercialization of fuel cells,,\n This is a summary report o...,036054429500003Y


In [None]:
# Saving as persistent storage - .csv file
df.to_csv('energy_correction.csv', mode='a', index = False, header=None)