In [1]:
from time import sleep
import requests
import json

In [2]:
author_ids = [
    (1745524,    'Tim'),
    (2230578,    'Leilani'),
    (143711421,  'Alex'),
]

In [3]:
endpoint = 'https://api.semanticscholar.org/graph/v1/author/{:d}?fields=papers.authors,papers.year,papers.venue,papers.title,papers.url'

In [6]:
papers = {}
# using dict to de-duplicate

for author_id, name in author_ids:
    print(f'Getting papers by {name}')
    response = requests.get(endpoint.format(author_id))
    response = response.json()
    print(f'Got {len(response["papers"]):,d} papers.')
    
    for paper in response['papers']:
        papers[ paper['paperId'] ] = paper
    
    sleep(2)

print(f'Finished. Got {len(papers):,d} papers total.')

Getting papers by Tim
Got 62 papers.
Getting papers by Leilani
Got 39 papers.
Getting papers by Alex
Got 40 papers.
Finished. Got 140 papers total.


In [50]:
def format_authors(authors):
    author_strs = []
    for author in authors:
        if (author['authorId'] is not None) and (int(author['authorId']) in author_ids):
            author_strs.append('**'+author['name']+'**')
        else:
            author_strs.append(author['name'])
            
    return ', '.join(author_strs)

In [51]:
def format_paper(paper):
    
    title_str = '### [{title}]({url})'.format(**paper)
    
    venue_str = '*{venue}* {year}'.format(**paper)
    
    authors = format_authors(paper['authors'])
    
    return f'{title_str}\n{authors}\n\n{venue_str}'

In [52]:
def get_year(paper):
    try: return int( paper['year'] )
    except:
        return 1900

In [53]:
with open('../papers.md', 'w') as f:
    for paper in sorted(papers.values(), key=get_year, reverse=True):
        f.write( format_paper(paper) )
        f.write(3*'\n')
        
print(f'Finished writing {len(papers):,d} papers to markdown.')

Finished writing 140 papers to markdown.


In [54]:
list(papers.values())[0]['authors']

[{'authorId': '123521472', 'name': 'A. Shapiro'},
 {'authorId': '150247231', 'name': 'N. Marinsek'},
 {'authorId': '4889245', 'name': 'I. Clay'},
 {'authorId': '108681884', 'name': 'B. Bradshaw'},
 {'authorId': '104226726', 'name': 'E. Ramírez'},
 {'authorId': '108127403', 'name': 'J. Min'},
 {'authorId': '3430870', 'name': 'A. Trister'},
 {'authorId': '20431833', 'name': 'Yuedong Wang'},
 {'authorId': '1745524', 'name': 'Tim Althoff'},
 {'authorId': '1748978', 'name': 'L. Foschini'}]

In [55]:
format_authors( paper['authors'] )

'Andrew Beam, M. Fiterau, Peter F. Schulam, J. Fries, Michael C. Hughes, Alexander B. Wiltschko, Jasper Snoek, N. Antropova, R. Ranganath, B. Jedynak, Tristan Naumann, Adrian V. Dalca, Tim Althoff, Shubhi Asthana, P. Tandon, J. Kandola, Alexander J. Ratner, M. Ghassemi'

In [56]:
paper

{'paperId': '850ce029a5fb48885d67f43136d602f790bfbbfe',
 'url': 'https://www.semanticscholar.org/paper/850ce029a5fb48885d67f43136d602f790bfbbfe',
 'title': 'Machine Learning for Health ( ML 4 H )-What Parts of Healthcare are Ripe for Disruption by Machine Learning Right Now ?',
 'venue': '',
 'year': None,
 'authors': [{'authorId': '143649421', 'name': 'Andrew Beam'},
  {'authorId': '2592453', 'name': 'M. Fiterau'},
  {'authorId': '145610328', 'name': 'Peter F. Schulam'},
  {'authorId': '121010486', 'name': 'J. Fries'},
  {'authorId': '2067786273', 'name': 'Michael C. Hughes'},
  {'authorId': '49398909', 'name': 'Alexander B. Wiltschko'},
  {'authorId': '144108062', 'name': 'Jasper Snoek'},
  {'authorId': '38428647', 'name': 'N. Antropova'},
  {'authorId': '2615814', 'name': 'R. Ranganath'},
  {'authorId': '2521020', 'name': 'B. Jedynak'},
  {'authorId': '40466858', 'name': 'Tristan Naumann'},
  {'authorId': '3046516', 'name': 'Adrian V. Dalca'},
  {'authorId': '1745524', 'name': 'Tim 

In [45]:
paper['authors'][1]['authorId'] is not None

False