# OpenAlex Cited References
### Adapted from: https://github.com/eschares/OpenAlex-CitedReferences/tree/main with thanks to Eric Schares, Iowa State University; [eschares.github.io](eschares.github.io) and Sandra Mierz; [https://github.com/smierz](https://github.com/smierz) 
---

In [6]:
# needed dependencies
import requests
import pandas as pd
import pyarrow

Create API query

In [7]:
filtered_works_url = "https://api.openalex.org/works?page=1&filter=title_and_abstract.search:sonification+OR+%22auditory+display%22,keywords.id:sonification,keywords.id:auditory-display&sort=relevance_score:desc&per_page=10&mailto=ui@openalex.org"

In [8]:
def get_metadata_using_cursor_paging(openalex_url):
    session = requests.Session()

    # url with a placeholder for cursor
    openalex_url_with_cursor = openalex_url + '&per_page=200&cursor={}'

    # loop through pages
    cursor = '*'
    while cursor:
        # set cursor value and request page from OpenAlex
        url = openalex_url_with_cursor.format(cursor)
        print(url)
        page_with_results = session.get(url).json()

        # update cursor to meta.next_cursor
        cursor = page_with_results['meta']['next_cursor']

        # return page results to user to process
        results = page_with_results['results']
        yield results

In [9]:
def extract_selected_fields(openalex_work):
    return (  openalex_work['id'],
              openalex_work['doi'],
              openalex_work['publication_year'],
              openalex_work['title'],
              openalex_work['host_venue']['display_name'],
              openalex_work['host_venue']['publisher'],
              openalex_work['host_venue']['issn_l'],
              len(openalex_work['referenced_works'])
            )

In [10]:
def extract_references(work):
    return [(work['id'], ref) for ref in work['referenced_works']]

In [None]:
data_folder = '../files/ISU_2021_fullyear'

def store_in_file(data, column_names, filename):
    data_in_df = pd.DataFrame(data, columns=column_names)
    if filename.endswith("csv"):
        data_in_df.to_csv(filename, index=False)
    else:
        data_in_df.to_parquet(filename)