# Convert inspire ID's into short python dictionaries for Website
by Kyle Cranmer April 14, 2019

In [1]:
import requests
import json

In [2]:
#if you are running on Binder, you will need to uncomment the next line and execute it
#!pip install pyyaml 

In [3]:
import yaml

In [4]:
recid_unpublished = 1726790 #notpublished
recid_published = 1705857 #published
recid = recid_unpublished
url = 'https://labs.inspirehep.net/api/literature/'+str(recid)

In [5]:
def summarize_record(recid):
    url = 'https://labs.inspirehep.net/api/literature/'+str(recid)
    max_authors = 5
    r = requests.get(url)
    data = r.json()['metadata']
    mini_dict = {'recid':recid}
    mini_dict.update({'title':data['titles'][0]['title']})
    if len(data['authors'])>max_authors:
        #mini_dict.update({'authors':[a['full_name'] for a in data['authors'][:max_authors]]+['et. al.']})
        mini_dict.update({'authors':"; ".join([a['full_name'] for a in data['authors'][:max_authors]]+['et. al.'])})
    else:
        mini_dict.update({'authors':[a['full_name'] for a in data['authors']]})

    if 'collaborations' in data:
        mini_dict.update({'collaboration': data['collaborations'][0]['value']})

    mini_dict.update({'arxiv_eprint': data['arxiv_eprints'][0]['value']})
    mini_dict.update({'url': 'https://arxiv.org/abs/'+data['arxiv_eprints'][0]['value']})
    mini_dict.update({'creation_date': data['legacy_creation_date']})

    if 'publication_info' in data:
        mini_dict.update({'journal_title':data['publication_info'][0]['journal_title']})
        mini_dict.update({'journal_volume':data['publication_info'][0]['journal_volume']})
        mini_dict.update({'page_start':data['publication_info'][0]['page_start']})
        mini_dict.update({'journal_year':data['publication_info'][0]['year']})
    
    if 'dois' in data:
        mini_dict.update({'doi': data['dois'][0]['value']})
    return mini_dict



In [6]:
def summarize_records(recids):
    return {'publications':[summarize_record(recid) for recid in recids]}

## example summarizing 2 individual records

In [7]:
summarize_record(recid_published)

{'recid': 1705857,
 'title': 'Measurements of fiducial and differential cross-sections of $t\\bar{t}$ production with additional heavy-flavour jets in proton-proton collisions at $\\sqrt{s}$ = 13 TeV with the ATLAS detector',
 'authors': 'Aaboud, Morad; Aad, Georges; Abbott, Brad; Abdinov, Ovsat; Abeloos, Baptiste; et. al.',
 'collaboration': 'ATLAS',
 'arxiv_eprint': '1811.12113',
 'url': 'https://arxiv.org/abs/1811.12113',
 'creation_date': '2018-11-30',
 'journal_title': 'JHEP',
 'journal_volume': '04',
 'page_start': '046',
 'journal_year': 2019,
 'doi': '10.1007/JHEP04(2019)046'}

In [8]:
summarize_record(recid_unpublished)

{'recid': 1726790,
 'title': 'Machine learning and the physical sciences',
 'authors': 'Carleo, Giuseppe; Cirac, Ignacio; Cranmer, Kyle; Daudet, Laurent; Schuld, Maria; et. al.',
 'arxiv_eprint': '1903.10563',
 'url': 'https://arxiv.org/abs/1903.10563',
 'creation_date': '2019-03-27'}

## example summarizing a list of records

In [9]:
list_of_recids = [recid_published, recid_unpublished]
print(yaml.dump(summarize_records(list_of_recids),default_flow_style=False))

publications:
- arxiv_eprint: '1811.12113'
  authors: Aaboud, Morad; Aad, Georges; Abbott, Brad; Abdinov, Ovsat; Abeloos, Baptiste;
    et. al.
  collaboration: ATLAS
  creation_date: '2018-11-30'
  doi: 10.1007/JHEP04(2019)046
  journal_title: JHEP
  journal_volume: '04'
  journal_year: 2019
  page_start: '046'
  recid: 1705857
  title: Measurements of fiducial and differential cross-sections of $t\bar{t}$ production
    with additional heavy-flavour jets in proton-proton collisions at $\sqrt{s}$ =
    13 TeV with the ATLAS detector
  url: https://arxiv.org/abs/1811.12113
- arxiv_eprint: '1903.10563'
  authors: Carleo, Giuseppe; Cirac, Ignacio; Cranmer, Kyle; Daudet, Laurent; Schuld,
    Maria; et. al.
  creation_date: '2019-03-27'
  recid: 1726790
  title: Machine learning and the physical sciences
  url: https://arxiv.org/abs/1903.10563



## Playground

In [10]:
url

'https://labs.inspirehep.net/api/literature/1726790'

In [11]:
r = requests.get(url)
r.json()

{'created': '2019-03-27T00:00:00+00:00',
 'id': 1726790,
 'links': {},
 'metadata': {'$schema': 'https://labs.inspirehep.net/schemas/records/hep.json',
  '_collections': ['Literature'],
  '_desy_bookkeeping': [{'date': '2019-04-03',
    'expert': 'L',
    'status': 'full'},
   {'date': '2019-04-07', 'expert': 'L', 'status': 'printed'}],
  '_export_to': {'HAL': True},
  'abstracts': [{'source': 'arXiv',
    'value': 'Machine learning encompasses a broad range of algorithms and modeling tools used for a vast array of data processing tasks, which has entered most scientific disciplines in recent years. We review in a selective way the recent research on the interface between machine learning and physical sciences.This includes conceptual developments in machine learning (ML) motivated by physical insights, applications of machine learning techniques to several domains in physics, and cross-fertilization between the two fields. After giving basic notion of machine learning methods and prin

In [12]:
data=r.json()

In [13]:
data.keys()

dict_keys(['created', 'id', 'links', 'metadata', 'updated'])

In [14]:
data['created']

'2019-03-27T00:00:00+00:00'

In [15]:
data['metadata'].keys()

dict_keys(['$schema', '_collections', '_desy_bookkeeping', '_export_to', 'abstracts', 'acquisition_source', 'arxiv_eprints', 'authors', 'citeable', 'control_number', 'core', 'curated', 'document_type', 'documents', 'figures', 'inspire_categories', 'keywords', 'legacy_creation_date', 'legacy_version', 'license', 'number_of_pages', 'preprint_date', 'references', 'self', 'texkeys', 'titles'])

In [16]:
data['metadata']['citeable']

True

In [17]:
data['metadata']['refereed']

KeyError: 'refereed'

In [None]:
data['metadata']['dois']

In [None]:
for title in data['metadata']['titles']:
    print(title['title'])

In [None]:
data['metadata']['_collections']

In [None]:
data['metadata']['publication_info']

In [None]:
data['metadata']['arxiv_eprints']

In [None]:
data['metadata']['arxiv_eprints'][0]['value']

In [None]:
for author in data['metadata']['authors']:
    print(author['full_name'])

In [None]:
summarize_record(recid_unpublished)

In [None]:
print(yaml.dump([summarize_record(recid)]))

In [None]:
'metadata' in data

In [None]:
data['metadata']['legacy_creation_date']

In [None]:
data['metadata']['collaborations']

In [None]:
myString = "; ".join([a['full_name'] for a in data['metadata']['authors'][:10]])
myString