## PubMed e-search and e-fetch example<br>
See also Biopython Tutorial and Cookbook at http://biopython.org/DIST/docs/tutorial/Tutorial.html

In [29]:
from Bio import Entrez
from Bio import Medline
import pandas as pd
import time
import numpy as np
Entrez.email = "person@nih.gov" # Replace with your email

In [30]:
# return a list of the PMIDs that match your search term

def getPubMedIDs(searchstring,maxrecords):
    IDlist=[]
    if not maxrecords > 100000: #maximum possible = 100,000 records
        handle = Entrez.esearch(db="pubmed", term=searchstring, retmax = maxrecords) 
        result = Entrez.read(handle)
        IDlist= result["IdList"]
        handle.close()

    return IDlist

In [31]:
# get MEDLINE data records for each PMID and store in a dataframe
# searchPMIDlist was created by getPubMedIDs function
# this will only get the first 10000 records from your PMID list

def getPubMeddata(searchPMIDlist,dataframename):
    
    # a trick to remove duplicates from the PMID list using set and list
    searchPMIDlist = list(set(searchPMIDlist))
    
    # if there are no PMIDs in the list, return the same dataframe
    if not len(searchPMIDlist):
        return dataframename
    
    # for displaying number of records processed by nearest power of 10
    counter=0
    marker=int(10**np.floor([np.log10(len(searchPMIDlist))])[0])
    
    # get data records in MEDLINE format
    fetchhandle = Entrez.efetch(db="pubmed", id=searchPMIDlist, rettype="medline", retmode="text")
    fetchresult = Medline.parse(fetchhandle)

    # parse the dictionary of returned records
    for record in fetchresult: 
        if "PMID" not in record: # if there's no PMID in this record (rare), skip it
            continue

        PT=''
        if "PT" in record: # if there's a pub type list in this record, store it
            PT=record["PT"]
        TI=''
        if "TI" in record: # if there's a title in this record, store it
            TI=record["TI"]
        AB=''
        if "AB" in record: # if there's an abstract in this record, store it
            AB=record["AB"]

        # put the data you found into a new row in the dataframe
        # you might want to collect different data for your purposes
        dataframename = dataframename.append({'PMID': record["PMID"],
                                              'PT': PT,
                                              'Title': TI,
                                              'Abstract': AB,
                                              'AbLength': len(AB)}, ignore_index=True)
        
        # if we've processed a new batch of records, display number of records processed
        counter += 1
        if not counter % marker:
            print(counter, "records processed")
            
    time.sleep(3) # wait time between repeated fetches
    fetchhandle.close()
    
    return dataframename

In [32]:
# Search Example 1
myPMIDlist=[]
searchstring="cognition[Title]"

myPMIDlist = getPubMedIDs(searchstring, 3000)
print("Search string:", searchstring)
print("Total PMIDs found:",len(myPMIDlist))

Search string: cognition[Title]
Total PMIDs found: 3000


In [33]:
# Search Example 2
myPMIDlist=[]
searchstring="thermotherapy[Title/Abstract]"

myPMIDlist = getPubMedIDs(searchstring, 3000)
print("Search string:", searchstring)
print("Total PMIDs found:",len(myPMIDlist))

Search string: thermotherapy[Title/Abstract]
Total PMIDs found: 2318


In [34]:
# Search Example 3
myPMIDlist=[]
searchstring='PNAS[ta] AND 97[vi]'

myPMIDlist = getPubMedIDs(searchstring, 3000)
print("Search string:", searchstring)
print("Total PMIDs found:",len(myPMIDlist))

Search string: PNAS[ta] AND 97[vi]
Total PMIDs found: 2651


In [35]:
# Search Example 4
myPMIDlist=[]
searchstring='"mitochondrial dynamics"[Title]'

myPMIDlist = getPubMedIDs(searchstring, 3000)
print("Search string:", searchstring)
print("Total PMIDs found:",len(myPMIDlist))

Search string: "mitochondrial dynamics"[Title]
Total PMIDs found: 712


In [36]:
# initialize your dataframe for the citation record data
# you might want to collect different data for your purposes

columnlist=['PMID', 'PT', 'Title', 'Abstract','AbLength']
mytable = pd.DataFrame(columns=columnlist, index=None)

In [37]:
# Fetch Example

mytable = getPubMeddata(myPMIDlist, mytable) # will only get the first 10000 records from your PMID list
print('Record Table Length:',len(mytable))
mytable.head(10)

100 records processed
200 records processed
300 records processed
400 records processed
500 records processed
600 records processed
700 records processed
Record Table Length: 712


Unnamed: 0,PMID,PT,Title,Abstract,AbLength
0,19703656,"[Journal Article, Research Support, N.I.H., Ex...",From mitochondrial dynamics to arrhythmias.,The reactive oxygen species (ROS)-dependent mi...,1440
1,20875628,"[Journal Article, Research Support, Non-U.S. G...",Mitochondrial dynamics.,Mitochondrial dynamics is a key feature for th...,1030
2,21936845,"[Journal Article, Research Support, Non-U.S. G...",Mitochondrial dynamics in yeast cell death and...,Mitochondria play crucial roles in programmed ...,1081
3,24654596,"[Journal Article, Research Support, N.I.H., Ex...",Mitochondrial dynamics: biology and therapy in...,INTRODUCTION: Lung cancer mortality rates rema...,1429
4,19426874,"[Journal Article, Research Support, N.I.H., Ex...",Monitoring mitochondrial dynamics with photoac...,Mitochondria are dynamic organelles that under...,1125
5,19393022,"[Journal Article, Research Support, N.I.H., Ex...",The role of abnormal mitochondrial dynamics in...,Mitochondria play critical roles in neuronal f...,959
6,15254264,"[Journal Article, Research Support, N.I.H., Ex...",Importance of mitochondrial dynamics during me...,Opposing fission and fusion events maintain th...,1484
7,24021799,"[Journal Article, Research Support, Non-U.S. G...",Manganese induces mitochondrial dynamics impai...,Manganese (Mn) is an essential trace element d...,1340
8,17145957,"[Journal Article, Research Support, Non-U.S. G...",Orchestration of lymphocyte chemotaxis by mito...,Lymphocyte traffic is required to maintain hom...,832
9,25652199,"[Journal Article, Research Support, N.I.H., Ex...",New therapeutics to modulate mitochondrial dyn...,The processes that control the number and shap...,438
