In [95]:
import ads
import requests
import math
import argparse
import datetime as dt

code source: https://github.com/adsabs/ads-examples/blob/master/library_csv/lib_2_csv.py

In [2]:
def get_config():
    """
    Load ADS developer key from file
    :return: str
    """
    #global token
    token = None
    if token is None:
        try:
            with open(os.path.expanduser('~/.ads/dev_key')) as f:
                # ~/.ads/dev_key should contain your ADS API token
                token = f.read().strip()
        except IOError:
            print('The script assumes you have your ADS developer token in the'
                  'folder: {}'.format())

    return {
        'url': 'https://api.adsabs.harvard.edu/v1/biblib',
        'headers': {
            'Authorization': 'Bearer:{}'.format(token),
            'Content-Type': 'application/json',
        }
    }

In [5]:
def get_libraries():
    """
    Get a list of all my libraries and their meta-data
    :return: list
    """

    config = get_config()

    r = requests.get(
        '{}/libraries'.format(config['url']),
        headers=config['headers']
    )

    # Collect a list of all of our libraries, this will include the number
    # of documents, the name, description, id, and other meta data
    try:
        data = r.json()['libraries']
        return data
    except ValueError:
        raise ValueError(r.text)

In [201]:
def get_library(library_id, num_documents, start = 0, rows=25):
    """
    Get the content of a library when you know its id. As we paginate the
    requests from the private library end point for document retrieval,
    we have to repeat requests until we have all documents.
    :param library_id: identifier of the library
    :type library_id:
    :param num_documents: number of documents in the library
    :type num_documents: int
    :param start: start with a given row
    :type start
    :return: list
    """

    config = get_config()

    #start = 0
    num_documents -= start
    #rows = 25
    num_paginates = int(math.ceil(num_documents / (1.0*rows)))

    documents = []
    for i in range(num_paginates):
        print('Pagination {} out of {}: rows:'.format(i+1, num_paginates))

        r = requests.get(
            '{}/libraries/{id}?start={start}&rows={rows}'.format(
                config['url'],
                id=library_id,
                start=start,
                rows=rows
            ),
            headers=config['headers']
        )

        # Get all the documents that are inside the library
        try:
            data = r.json()['documents']
        except ValueError:
            print('bad')
            raise ValueError(r.text)

        documents.extend(data)

        start += rows

    return documents

In [155]:
if __name__ == '__main__':


    config = get_config()
    
    library_id = 'JqsNRmsURGeY3xWDdb4UWg'

    r = requests.get('{}/libraries/{id}'.format(config['url'],id=library_id),
                     headers=config['headers'])
    metadata = r.json()['metadata']

    if not os.path.isfile('bibcodes'):
        library = get_library(library_id=metadata['id'], num_documents=metadata['num_documents'])
        bibtex = ads.ExportQuery(library,format='bibtexabs').execute()
        with open('bibcodes','w') as f:
            f.writelines('{}\n'.format(bc) for bc in library)
        with open('library.bib','w') as f:
            f.write(bibtex)




    if os.path.isfile('bibcodes'):
        with open('bibcodes','r') as f:
            current = [i.strip() for i in f.readlines()]
        new = get_library(library_id=metadata['id'],num_documents=metadata['num_documents'],start=len(current))
        bibtex = ads.ExportQuery(new,format='bibtexabs').execute()
        with open('bibcodes','a') as f:
            f.writelines('{}\n'.format(bc) for bc in new)
        with open('library.bib','a') as f:
            f.write('\n\n\n\n\n')
            f.write(bibtex)



Pagination 1 out of 1: rows:


In [157]:
%%bash

echo "Number of entries in library.bib"
grep @ library.bib | wc -l

wc -l bibcodes

Number of entries in library.bib
     525
     525 bibcodes


In [214]:
config = get_config()

library_id = 'JqsNRmsURGeY3xWDdb4UWg'

r = requests.get('{}/libraries/{id}'.format(config['url'],id=library_id),
                 headers=config['headers'])
metadata = r.json()['metadata']

metadata

{'num_documents': 499,
 'date_last_modified': '2020-12-04T07:46:28.511540',
 'permission': 'owner',
 'description': 'Paper Library',
 'public': False,
 'num_users': 1,
 'owner': 'john.lewis',
 'date_created': '2020-11-01T21:22:28.618057',
 'id': 'JqsNRmsURGeY3xWDdb4UWg',
 'name': 'Full Library'}

In [235]:
for i in r.headers:
    print(i,r.headers[i])

Date Fri, 04 Dec 2020 09:20:23 GMT
Content-Type application/json
Transfer-Encoding chunked
Connection keep-alive
Server openresty/1.15.8.1
Vary Accept-Encoding, Origin
Access-Control-Allow-Origin http://0.0.0.0:8000, http://adsabs.harvard.edu, http://adsx.cfa.harvard.edu:8888, http://hourly.adslabs.org, http://localhost:8000, http://ui.adsabs.harvard.edu, http://ui.adslabs.org, https://arxiv.org, https://beta.arxiv.org, https://demo.adsabs.harvard.edu, https://dev.adsabs.harvard.edu, https://devui.adsabs.harvard.edu, https://experimental-bumblebee.us-east-1.elasticbeanstalk.com, https://prod.adsabs.harvard.edu, https://staging-bumblebee.elasticbeanstalk.com, https://staging-bumblebee.us-east-1.elasticbeanstalk.com, https://ui.adsabs.harvard.edu
Access-Control-Allow-Methods DELETE, GET, OPTIONS, POST, PUT
Access-Control-Allow-Headers Accept, Authorization, Content-Type, Orcid-Authorization, X-BB-Api-Client-Version, X-CSRFToken
Access-Control-Allow-Credentials true
X-RateLimit-Limit 500


In [203]:
library = get_library(library_id=metadata['id'], num_documents=metadata['num_documents'],rows=500)
len(library)

Pagination 1 out of 2: rows:
Pagination 2 out of 2: rows:


498

In [213]:
config['url']

{'url': 'https://api.adsabs.harvard.edu/v1/biblib',
 'headers': {'Authorization': 'Bearer:UOvspRePbNBT9fr77kwDnyIavDzrtXMYODOdGTJy',
  'Content-Type': 'application/json'}}

In [212]:
curl -v

{}

In [200]:
n = max([len(n['name']) for n in libraries])
print(('{:>%s}   {}'%n).format('NAME','ADS ID'))
print('-'*80)
for item in libraries:
    print(('{:>%s}   {}'%n).format(item['name'],item['id']))

                                            NAME   ADS ID
--------------------------------------------------------------------------------
                   Statistics, Methods, Software   ZvchyvILRXqYLeqMUFB5sw
                                    White Dwarfs   aKqMp4ecRnyXXRI6wk928g
                                John Arban Lewis   0VJ8J20AQaqlWlltUKzq0Q
Molecular Clouds & ISM - Observations and Theory   fLl7MqkLSeSjp5PCT0TJYg
                      CO to H2 Conversion Factor   XqnGQqcEQ2qqZMxITqpGZg
                                         history   xfqrrBfnQI-qud1QtFK5eA
                                      CO Surveys   DH545I9FRdmwU2F8nlHlVw
                                          Aliens   I9MzniPoQHOy6glFvb4kjA
                                        Pedagogy   onxH5mxHQvyswBToUvBNxg
                                     Dust to Gas   WIIJB-QyThKtqbDh6Pyr3Q
                                          Larson   6ehH_JVmTFmNtNG4Q-wNww
                                         Review