In [2]:
from Bio import Entrez
import pandas as pd

In [3]:
Entrez.email = 'geena.ildefonso@vanderbilt.edu'


def search(query):
    """ search a name across pubmed and return pubmed ids of publications

    :param query: string of format "first_name last_name"
    :return: string of ids of papers
    """

    query_string = '%s[AU] AND vanderbilt university[AD]' % query
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='20',
                            retmode='xml',
                            term=query_string)
    try:
        results = Entrez.read(handle, validate=True)
    except AttributeError:
        print("did not find any for %s" % query)
        return
    if 'Count' in results:
        if results['Count'] == '0':
            return
    else:
        return
    id_list = results['IdList']
    ids = ",".join(id_num for id_num in id_list)

    return ids

In [4]:
# loads in data, formatted file has FirstName, LastName, and papers as column headers
df = pd.read_csv('student_names_list.csv', index_col=False)

# empty list to stores papers column
new_papers = []
for i, row in df.iterrows():
    first_name = str(row['FirstName'])
    last_name = str(row['LastName'])
    full_name = '%s %s' % (first_name, last_name)
    tmp_papers = search(full_name)

    # if they don't have any publications, None will be returned
    # Nothing to do, so return empty string to new_papers column
    if tmp_papers is None:
        new_papers.append('')
        continue

    # if it isn't None, lets make it a list
    tmp_papers = tmp_papers.split(',')