In [8]:
import crossref.restful
import arxiv

# Using crossref and arxiv APIs to get article information based on only the title

A bit hackey, but pretty cool, and should just work as long as your title is unique (and your arxiv title matches the journal title). 

In [9]:
client = arxiv.Client() 
works = crossref.restful.Works()

In [10]:
def get_record_from_paper_title(paper_title):
    record = {}
    record["title"] = paper_title
    
    # Grab the arxiv id
    search = arxiv.Search(query = paper_title, max_results = 1)
    results = client.results(search)
    for r in results:
        break
    record["arxiv_link"] = r.entry_id
    
    # Grab the paper info from crossref
    w = works.query(bibliographic=paper_title)
    for paper in w:
        break
    
    author_list = ""
    for a in paper["author"]:
        # print(a)
        author_list += a["given"]
        author_list += " " + a["family"]
        author_list += ", "
    author_list = author_list.rstrip(", ")
    record["author_list"] = author_list
    
    year = paper["created"]['date-parts'][0][0]
    record["year"] = year

    journal_ref = paper["short-container-title"][0]
    journal_ref += " " + paper["volume"]
    if "page" in paper.keys():
        journal_ref += ", " + paper["page"]
    elif "article-number" in paper.keys():
        journal_ref += ", " + paper["article-number"]    
    journal_ref += " (%d)" % year
    record["journal_ref"] = journal_ref
    record["doi"] = paper["DOI"]
    record["journal_link"] = "http://dx.doi.org/" + paper["DOI"]

    return record

In [11]:
title = "Imaging Transport Resonances in the Quantum Hall Effect"
get_record_from_paper_title(title)

{'title': 'Imaging Transport Resonances in the Quantum Hall Effect',
 'arxiv_link': 'http://arxiv.org/abs/cond-mat/0506347v2',
 'author_list': 'G. A. Steele, R. C. Ashoori, L. N. Pfeiffer, K. W. West',
 'year': 2005,
 'journal_ref': 'Phys. Rev. Lett. 95, 136804 (2005)',
 'doi': '10.1103/physrevlett.95.136804',
 'journal_link': 'http://dx.doi.org/10.1103/physrevlett.95.136804'}

In [12]:
title = "Photon-Pressure Strong-Coupling between two Superconducting Circuits"
get_record_from_paper_title(title)

{'title': 'Photon-Pressure Strong-Coupling between two Superconducting Circuits',
 'arxiv_link': 'http://arxiv.org/abs/2109.00205v2',
 'author_list': 'D. Bothner, I. C. Rodrigues, G. A. Steele',
 'year': 2020,
 'journal_ref': 'Nat. Phys. 17, 85-91 (2020)',
 'doi': '10.1038/s41567-020-0987-5',
 'journal_link': 'http://dx.doi.org/10.1038/s41567-020-0987-5'}

In [13]:
title = "Multi-Tone Microwave Locking via Real-Time Feedback"
get_record_from_paper_title(title)

{'title': 'Multi-Tone Microwave Locking via Real-Time Feedback',
 'arxiv_link': 'http://arxiv.org/abs/2304.06296v2',
 'author_list': 'J.P. van Soest, C.A. Potts, S. Peiter, A. Sanz Mora, G.A. Steele',
 'year': 2023,
 'journal_ref': 'Phys. Rev. Applied 20, 034007 (2023)',
 'doi': '10.1103/physrevapplied.20.034007',
 'journal_link': 'http://dx.doi.org/10.1103/physrevapplied.20.034007'}

### Todo

* Make it able to handle arxiv entries that have no journal article (yet)
  * Would have to pull instead article info from arxiv api
* Handle (naughty) journal articles that have no arxiv entry
  * easy, would just drop arxiv tag, or set it to None
* Maybe do some fuzzy logic to deal with papers that have slightly different titles on the arxiv and in the journal