# Purpose

The purpose of this notebook is to retrieve data from Scopus and save it down into local directory.

# Import Dependencies

In [3]:
import pandas as pd
from pybliometrics.scopus import ScopusSearch
from datetime import datetime
import os

# Get Data

Note: You have to have access to Scopus in order to download these data. You may also need to be connected to a VPN e.g. your university VPN.

In [2]:
def get_papers(year):

    print(f'Searching for {year}...')
    
    tick = datetime.now()
    query = f'DOCTYPE ( ar )  AND  AFFIL ( united  AND kingdom )  AND  PUBYEAR  =  {year}'
    s = ScopusSearch(query)
    tock = datetime.now()
    
    print(f'Completed in {tock - tick}')
    
    return pd.DataFrame(pd.DataFrame(s.results))

__Obtain data for each year__

In [9]:
abstracts = []
data = []

for year in range(2000, 2009):
    
    df = get_papers(year)
    
    # Add abstracts to own data frame - paper identifier and abstracts only
    abstracts.append(df[['eid', 'description']])
    
    # Add other data to own data frame
    columns = ['eid', 'title', 'coverDate', 'source_id', 'publicationName', 'afid', 'affilname', 
               'affiliation_city', 'affiliation_country']
    data.append(df[columns])
    
    # for illustration, use only one year
    break

abstracts = pd.concat(abstracts, ignore_index=True)
data = pd.concat(data, ignore_index=True)
    
# Save dfs
#os.mkdir('data')
abstracts.to_csv(os.getcwd() + r'\data\abstracts.csv')
data.to_csv(os.getcwd() + r'\data\data.csv')

Searching for 2000...
Completed in 0:00:08.840028
