## Use multiple Scopus Author IDs to retrieve lists of articles by author

In [4]:
#pip install config

Collecting config
  Downloading config-0.5.0.post0-py2.py3-none-any.whl (20 kB)
Installing collected packages: config
Successfully installed config-0.5.0.post0
Note: you may need to restart the kernel to use updated packages.


In [51]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import re
import io
from config import api_key
from collections import OrderedDict
from pandas.io.json import json_normalize  

In [52]:
#The function "load_csv_author_ids" loads a CSV you have created that has columns called: last_name, scopus_author_id.
#This CSV may also contain other information helpful to your work. The function returns a pandas data frame called 
#"multiple_authors_df".

file_path = "radiation_oncology_without_details.csv"

def load_csv_author_ids(file_path):
    # File to Load
    multiple_authors_df = file_path

    # Read the CSV file and store into Pandas DataFrame with the column Scopus Author ID as a string
    multiple_authors_df = pd.read_csv(multiple_authors_df, encoding="utf-8", dtype ={'scopus_author_id': str})

    #Change the column names to lower case with underscore for spaces
    multiple_authors_df.columns =  multiple_authors_df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(","").str.replace(")","")
    #radiation_oncology_df.head()
    return multiple_authors_df

multiple_authors_df = load_csv_author_ids(file_path)
load_csv_author_ids(file_path)

Unnamed: 0,last_name,first_name,mi,scopus_author_id,scopus_search,unnamed:_5
0,Donnelly,Eric,D,21233377200,AU-ID(21233377200),AU-ID(21233377200) OR
1,Gentile,Michelle,S,56018970700,AU-ID(56018970700),AU-ID(56018970700) OR
2,Gius,David,R,7003610066,AU-ID(7003610066),AU-ID(7003610066) OR
3,Hayes,John,P,55313350000,AU-ID(55313350000),AU-ID(55313350000) OR
4,Kalapurakal,John,A,7003993738,AU-ID(7003993738),AU-ID(7003993738) OR
5,Kruser,Timothy,J,24448583300,AU-ID(24448583300),AU-ID(24448583300) OR
6,Mittal,Bharat,B,7102661470,AU-ID(7102661470),AU-ID(7102661470) OR
7,Mittal,Bharat,B,57207807061,AU-ID(57207807061),AU-ID(57207807061) OR
8,Sachdev,Sean,,56443683800,AU-ID(56443683800),AU-ID(56443683800) OR
9,Sathiaseelan,Vythialingam,,6701754514,AU-ID(6701754514),AU-ID(6701754514) OR


In [53]:
#List the column names in the dataframe
multiple_authors_df.columns

Index(['last_name', 'first_name', 'mi', 'scopus_author_id', 'scopus_search',
       'unnamed:_5'],
      dtype='object')

In [54]:
#Check the data type in the dataframe columns called scopus_author_id and scopus_search
multiple_authors_df.scopus_author_id.dtype
#multiple_authors_df.scopus_search.dtype

dtype('int64')

In [55]:
#The function "clean_author_id_list" takes in the multiple_authors_df and formats the "scopus_author_id" column 
#as a string, then uses the column to create a list, removes any of the "nan" values for authors that don't 
#have an ID, and finally returns a list called "cleaned_author_id_list"

def clean_author_id_list(multiple_authors_df):
    
    #Change the data type in the dataframe column called "scopus_author_id" from int64 to a string. 
    multiple_authors_df['scopus_author_id'] = multiple_authors_df['scopus_author_id'].astype(str)

    #Save the column called scopus_author_id to a list called Author_ID_List
    author_id_list = multiple_authors_df['scopus_author_id'].tolist()
    #print(author_ID_List)
    
    #Clean the Author_ID_List to remove nan
    cleaned_author_id_list = [x for x in author_id_list if str(x) != 'nan']
    #print(cleaned_author_id_list)
        
    return cleaned_author_id_list

cleaned_author_id_list = clean_author_id_list(multiple_authors_df)
clean_author_id_list(multiple_authors_df)

['21233377200',
 '56018970700',
 '7003610066',
 '55313350000',
 '7003993738',
 '24448583300',
 '7102661470',
 '57207807061',
 '56443683800',
 '6701754514',
 '36143455500',
 '24336584500',
 '7005165328',
 '453']

In [56]:
#The function "create_multiple_author_id_query" takes in the "cleaned_author_id_list" and adds the necessary syntax of
# "AU-ID(xxxxxxxxx)" that is required for searching Scopus Author IDs. The function returns 
#the "scopous_multiple_author_id_query".

def create_multiple_author_id_query(cleaned_author_id_list):

    #Add the necessary syntax to the cleaned_Author_ID_List
    scopus_mulitple_author_id_query = []
    for x in cleaned_author_id_list:
        authorID_string = "".join(("AU-ID(", x,")"))
        #print(authorID_string)
        scopus_mulitple_author_id_query.append(authorID_string)

    #print(scopus_mulitple_author_id_query)
    return scopus_mulitple_author_id_query

scopus_mulitple_author_id_query = create_multiple_author_id_query(cleaned_author_id_list)
create_multiple_author_id_query(cleaned_author_id_list)

['AU-ID(21233377200)',
 'AU-ID(56018970700)',
 'AU-ID(7003610066)',
 'AU-ID(55313350000)',
 'AU-ID(7003993738)',
 'AU-ID(24448583300)',
 'AU-ID(7102661470)',
 'AU-ID(57207807061)',
 'AU-ID(56443683800)',
 'AU-ID(6701754514)',
 'AU-ID(36143455500)',
 'AU-ID(24336584500)',
 'AU-ID(7005165328)',
 'AU-ID(453)']

In [57]:
def create_multiple_author_id_query(cleaned_author_id_list):

    #Add the necessary syntax to the cleaned_Author_ID_List
    scopus_mulitple_author_id_query = []
    for x in cleaned_author_id_list:
        authorID_string = "".join(("AU-ID(", x,")"))
        #print(authorID_string)
        scopus_mulitple_author_id_query.append(authorID_string)

    #print(scopus_mulitple_author_id_query)
    return scopus_mulitple_author_id_query

scopus_mulitple_author_id_query = create_multiple_author_id_query(cleaned_author_id_list)
create_multiple_author_id_query(cleaned_author_id_list)

#The function "get_scopus_articles" takes in the "scopus_multiple_author_id_query" and creates a necessary URL 
#for querying the Scopus API. The Scopus API key is passed in through the "headers" (see above Dependencies 
#"from config import api_key") and the config file is also referenced in the git ignore so it won't be exposed 
#on Github. The API is called and returns a response for each Scopus Author ID in the list Each response is 
#saved in a "single_author_article_dict". Each of the "single_author_article_dict" are then appended to the 
#"multiple_author_article_list". The function returns a list of dictionaries called 
#the "multiple_author_article_list". 

multiple_author_article_list = []
#multiple_author_article_dict = {}
single_author_article_dict = {}
date = "2002-2003"

In [99]:
#The function "get_scopus_articles" takes in the "scopus_multiple_author_id_query" and creates a necessary URL 
#for querying the Scopus API. The Scopus API key is passed in through the "headers" (see above Dependencies 
#"from config import api_key") and the config file is also referenced in the git ignore so it won't be exposed 
#on Github. The API is called and returns a response for each Scopus Author ID in the list Each response is 
#saved in a "single_author_article_dict". Each of the "single_author_article_dict" are then appended to the 
#"multiple_author_article_list". The function returns a list of dictionaries called 
#the "multiple_author_article_list". 

multiple_author_article_list = []
#multiple_author_article_dict = {}
single_author_article_dict = {}
date = "2019"
view = "Standard"
field = ["dc:title", "dc:description"]

def get_scopus_articles(scopus_mulitple_author_id_query):
    
    for authorid in scopus_mulitple_author_id_query:
        url = "http://api.elsevier.com/content/search/scopus?"           
        
        headers = {
             "X-ELS-APIKey": api_key,
             'Accept':'application/json'
        }
        parameters = {
            "query": 'nanosafety',
            "date": date, 
            "count": 10, 
            "field": field
        }
        
        #Make the API request 
        single_author_article_response = requests.get(url, headers=headers, params=parameters)
        #print(single_author_article_response.url)
        #print(single_author_article_response.status_code)
        
              
        #Append each single_author_article_dict response to multiple_author_article_list to create a list of dictionaries
        single_author_article_dict = single_author_article_response.json()
        #print(type(single_author_article_dict)) 
        #print(single_author_article_dict)
        multiple_author_article_list.append(single_author_article_dict.copy())
    
    return multiple_author_article_list
       
article_response = get_scopus_articles(scopus_mulitple_author_id_query)
article_response[0]
#References
#https://dev.elsevier.com/guides/ScopusSearchViews.htm
#https://stackoverflow.com/questions/53558837/python-loop-to-pull-api-data-for-iterating-urls
#https://stackoverflow.com/questions/36410800/python-3-parse-json-from-multiple-api-requests-into-a-list-and-output-to-a-fil
#https://www.pluralsight.com/guides/web-scraping-with-request-python

{'search-results': {'opensearch:totalResults': '593',
  'opensearch:startIndex': '0',
  'opensearch:itemsPerPage': '10',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'nanosafety',
   '@startPage': '0'},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=10&query=nanosafety&date=2019&field=dc%3Atitle&field=dc%3Adescription',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=10&query=nanosafety&date=2019&field=dc%3Atitle&field=dc%3Adescription',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=10&count=10&query=nanosafety&date=2019&field=dc%3Atitle&field=dc%3Adescription',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'last',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=

In [93]:
url = "http://api.elsevier.com/content/search/scopus?" + "?format=json"
headers = {"X-ELS-APIKey": api_key, 'Accept':'application/json'}
parameters = {"query": 'nanosafety', "view": "Complete", "date": date}
article_response_2 = requests.get(url, headers=headers, params=parameters)
article_response_2.json()

{'search-results': {'opensearch:totalResults': '593',
  'opensearch:startIndex': '0',
  'opensearch:itemsPerPage': '25',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'nanosafety',
   '@startPage': '0'},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=nanosafety&view=Complete&date=2019',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=nanosafety&view=Complete&date=2019',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=25&count=25&query=nanosafety&view=Complete&date=2019',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'last',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=568&count=25&query=nanosafety&view=Complete&date=2019',
    '@type': 'applicat

In [96]:
#The function "make_scopus_articles_df" takes in the "multiple_author_article_list" and uses json_normalize to
#flatten the json contained in the "entry" field. The function returns a dataframe called the "scopus_articles_df".

def make_scopus_articles_df(multiple_author_article_list):
    #final_list = json_normalize(multiple_author_list, meta=["search-results"], record_path=["search-results", "entry"])
    scopus_articles_df = pd.DataFrame.from_dict(pd.json_normalize(multiple_author_article_list, meta=["search-results"], record_path=["search-results", "entry"]),orient="columns")
    
    return scopus_articles_df

scopus_articles_df = make_scopus_articles_df(article_response_2.json())

title_description_df = scopus_articles_df[["dc:title", "dc:description"]]
title_description_df


Unnamed: 0,dc:title,dc:description
0,Microfluidic Synthesis of Functional Nanoparti...,This chapter focuses on microfluidic-based syn...
1,Simultaneous delivery of DNA vaccine and hydro...,The activation and maturation of dendritic cel...
2,Size-dependent tissue-specific biological effe...,Background: Understanding the in vivo size-dep...
3,Nucleic acid nanoparticles at a crossroads of ...,Vaccines and immunotherapies involve a variety...
4,Selenium modulated gut flora and promoted deco...,Introduction: Selenium plays important roles i...
5,Plasmon-Enhanced Oxidase-Like Activity and Cel...,Local surface plasmon resonance (LSPR)-enhance...
6,Cold-Responsive Nanoparticle Enables Intracell...,Conventional cryopreservation of mammalian cel...
7,Turning Challenges into Opportunities for Prom...,Systems thinking encompasses a set of skills f...
8,Organoplatinum-Substituted Polyoxometalate Inh...,Aggregated β-amyloid (Aβ) is widely considered...
9,Predominance of secondary organic aerosol to p...,Reactive oxygen species (ROS) are believed to ...
