## Preparation

In [None]:
import requests
import json
import pandas as pd
import time
from dotenv import load_dotenv # type: ignore

In [None]:
# read env
load_dotenv()
apikey = os.getenv("APIKey")

In [None]:
# uncomment if read in Google Colab. Setup the API key in Google Colab (see README.md)
# from google.colab import userdata
# apikey = userdata.get('apiKey')

## Using Scopus API

In [2]:
issn = '0008-7041'

In [3]:
search_query = f"(TITLE-ABS-KEY((bivariate AND multivariate) OR geovis*) AND ISSN({issn})) AND (PUBYEAR AFT 2009 AND PUBYEAR BEF 2024)"

In [4]:
search_query

'(TITLE-ABS-KEY((bivariate AND multivariate) OR geovis*) AND ISSN(0008-7041)) AND (PUBYEAR AFT 2009 AND PUBYEAR BEF 2024)'

### How many papers resulted from that search?

In [None]:
def count_results(query):
    """
    Count the total number of results for a given query.
    """
    url = "https://api.elsevier.com/content/search/scopus"
    headers = {
        "Accept": "application/json",
        "X-ELS-APIKey": apikey
    }
    params = {
        "query": query,
        "count": 0  # Set count to 0 to only get the total number of results
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = json.loads(response.text)
        total_results = int(data['search-results']['opensearch:totalResults'])
        return total_results
    else:
        return None

# Example usage:
total_results = count_results(search_query)
if total_results is not None:
    print(f"Total results: {total_results}")
else:
    print("Failed to fetch total results.")


Total results: 8


### Get the papers

In [None]:
def search_scopus(query, sort_by, start):
    """
    Search Scopus for a given query and return the results as a pandas DataFrame.
    """
    url = "https://api.elsevier.com/content/search/scopus"
    headers = {
        "Accept": "application/json",
        "X-ELS-APIKey": apikey
    }
    params = {
        "query": query,
        "sort": sort_by,
        "start": start
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = json.loads(response.text)

        # Save to JSON file
        with open('papers.json', 'w') as f:
            json.dump(data, f)

        # Extract specified attributes
        entries = data['search-results']['entry']
        papers = []
        for entry in entries:
            paper = {
                "prism:url": entry.get("prism:url"),
                "dc:title": entry.get("dc:title"),
                "dc:creator": entry.get("dc:creator"),
                "prism:publicationName": entry.get("prism:publicationName"),
                "prism:doi": entry.get("prism:doi"),
                "citedby-count": entry.get("citedby-count")
            }
            papers.append(paper)

        # Load into pandas DataFrame
        df = pd.DataFrame(papers)

        return df, data
    else:
        return None, None

In [None]:
df, data = search_scopus(search_query, "date", 0)

In [None]:
df

Unnamed: 0,prism:url,dc:title,dc:creator,prism:publicationName,prism:doi,citedby-count
0,https://api.elsevier.com/content/abstract/scop...,Medieval Macrospace Through GIS: The Norse Wor...,Petrulevich A.,Cartographic Journal,10.1080/00087041.2019.1596341,2
1,https://api.elsevier.com/content/abstract/scop...,Atlas Design: A Usability Approach for the Dev...,Gómez Solórzano L.S.,Cartographic Journal,10.1080/00087041.2017.1393189,8
2,https://api.elsevier.com/content/abstract/scop...,Adaptive Multi-Scale Population Spatialization...,Hu L.,Cartographic Journal,10.1080/00087041.2016.1193273,9
3,https://api.elsevier.com/content/abstract/scop...,Cartography-oriented design of 3d geospatial i...,Semmo A.,Cartographic Journal,10.1080/00087041.2015.1119462,26
4,https://api.elsevier.com/content/abstract/scop...,Towards better WMS maps through the use of the...,Enescu I.I.,Cartographic Journal,10.1080/00087041.2015.1119468,4
5,https://api.elsevier.com/content/abstract/scop...,Hypercube-based visualization architecture for...,Enescu I.I.,Cartographic Journal,10.1080/00087041.2015.1119469,5
6,https://api.elsevier.com/content/abstract/scop...,How to assess visual communication of uncertai...,Kinkeldey C.,Cartographic Journal,10.1179/1743277414Y.0000000099,113
7,https://api.elsevier.com/content/abstract/scop...,IconMap-based visualisation technique and its ...,Zhang X.,Cartographic Journal,10.1179/1743277412Y.0000000016,1


In [None]:
start_index = 0
while True:
    df, data = search_scopus(search_query, "date", start_index)
    if df is not None:
        print(df.head())
        total_results = int(data['search-results']['opensearch:totalResults'])
        print(f"Total results: {total_results}")
        start_index += 25  # Increase the start index for the next page of results
        if start_index >= total_results:
            break
        time.sleep(60)  # Pause for 60 seconds to deal with rate limits
    else:
        print("Failed to fetch papers.")
        break


## As a Single Function

In [None]:
def search_all_papers(query):
    """
    Search Scopus for all papers matching the query and return a pandas DataFrame containing the results.
    """
    start_index = 0
    all_papers = []
    all_data = []

    while True:
        url = "https://api.elsevier.com/content/search/scopus"
        headers = {
            "Accept": "application/json",
            "X-ELS-APIKey": apikey
        }
        params = {
            "query": query,
            "sort": "date",
            "start": start_index
        }

        response = requests.get(url, headers=headers, params=params)

        if response.status_code == 200:
            data = json.loads(response.text)

            # Extract specified attributes
            entries = data['search-results']['entry']
            papers = []
            for entry in entries:
                paper = {
                    "prism:url": entry.get("prism:url"),
                    "dc:title": entry.get("dc:title"),
                    "dc:creator": entry.get("dc:creator"),
                    "prism:publicationName": entry.get("prism:publicationName"),
                    "prism:doi": entry.get("prism:doi"),
                    "citedby-count": entry.get("citedby-count")
                }
                papers.append(paper)

            all_papers.extend(papers)
            all_data.append(data)
            total_results = int(data['search-results']['opensearch:totalResults'])
            start_index += 25  # Increase the start index for the next page of results
            if start_index >= total_results:
                break
            time.sleep(60)  # Pause for 60 seconds to deal with rate limits
        else:
            print("Failed to fetch papers.")
            break

    # Save all data to JSON file
    with open('all_papers.json', 'w') as f:
        json.dump(all_data, f)

    # Load all papers into pandas DataFrame
    df = pd.DataFrame(all_papers)

    return df


In [None]:
df = search_all_papers(search_query)

In [None]:
df

Unnamed: 0,prism:url,dc:title,dc:creator,prism:publicationName,prism:doi,citedby-count
0,https://api.elsevier.com/content/abstract/scop...,Medieval Macrospace Through GIS: The Norse Wor...,Petrulevich A.,Cartographic Journal,10.1080/00087041.2019.1596341,2
1,https://api.elsevier.com/content/abstract/scop...,Atlas Design: A Usability Approach for the Dev...,Gómez Solórzano L.S.,Cartographic Journal,10.1080/00087041.2017.1393189,8
2,https://api.elsevier.com/content/abstract/scop...,Adaptive Multi-Scale Population Spatialization...,Hu L.,Cartographic Journal,10.1080/00087041.2016.1193273,9
3,https://api.elsevier.com/content/abstract/scop...,Cartography-oriented design of 3d geospatial i...,Semmo A.,Cartographic Journal,10.1080/00087041.2015.1119462,26
4,https://api.elsevier.com/content/abstract/scop...,Towards better WMS maps through the use of the...,Enescu I.I.,Cartographic Journal,10.1080/00087041.2015.1119468,4
5,https://api.elsevier.com/content/abstract/scop...,Hypercube-based visualization architecture for...,Enescu I.I.,Cartographic Journal,10.1080/00087041.2015.1119469,5
6,https://api.elsevier.com/content/abstract/scop...,How to assess visual communication of uncertai...,Kinkeldey C.,Cartographic Journal,10.1179/1743277414Y.0000000099,113
7,https://api.elsevier.com/content/abstract/scop...,IconMap-based visualisation technique and its ...,Zhang X.,Cartographic Journal,10.1179/1743277412Y.0000000016,1


In [None]:
# convert to CSV
df.to_csv('papers.csv', index=False)

---