# Getting medical abstracts using Entrez Utilities (E-Utils) API

In [None]:
pip install Bio

In [2]:
import pandas as pd
from Bio import Entrez
"""
You need to create an account at https://www.ncbi.nlm.nih.gov/
Fill in your NCBI registered email, your API key. 
Documentation for Entrez Utilities (E-Utils) - https://www.ncbi.nlm.nih.gov/books/NBK25501/
"""
Entrez.email = '' # Your email
Entrez.api_key = '' # Your API key

In [3]:
"""
Using esearch method you can choose a database (e.g. pubmed), add a search term (e.g. "neuroplasticity"), set retmax (max number of returned values), etc.
"""
search_results = Entrez.read(Entrez.esearch(db="pubmed", term="neuroplasticity", retmax = 3000, reldate=365, datetype="pdat", usehistory="y", ))
count = int(search_results["Count"])
print("Found %i results" % count)

Found 3405 results


In [4]:
"""
You will get an idlist, every article has an id.
"""
idlist = search_results['IdList']
ids = ",".join(idlist)

In [5]:
"""
After esearch, use efetch method, which helps extract more article information - titles, abstracts, etc.
"""
handle = Entrez.efetch(db="pubmed", id=ids, rettype="xml", retmode="text")
records = Entrez.read(handle)

title = [pubmed_article['MedlineCitation']['Article']['Abstract']['AbstractText'][0]  if 'Article' in pubmed_article['MedlineCitation']['Article'].keys() else pubmed_article['MedlineCitation']['Article']['ArticleTitle']  for pubmed_article in records['PubmedArticle']]
abstracts = [pubmed_article['MedlineCitation']['Article']['Abstract']['AbstractText'][0]  if 'Abstract' in pubmed_article['MedlineCitation']['Article'].keys() else pubmed_article['MedlineCitation']['Article']['ArticleTitle']  for pubmed_article in records['PubmedArticle']]

In [6]:
df = pd.DataFrame({'Title': title,
                   'Abstract': abstracts})

df

Unnamed: 0,Title,Abstract
0,Interrogating the function of GABA<sub>A</sub>...,To better understand neural circuits and behav...
1,Floralozone improves cognitive impairment in v...,Vascular dementia (VD) is the second largest t...
2,"The effects of Vilazodone, YL-0919 and Vortiox...",Parkinson's disease is a neurodegenerative dis...
3,Deciphering therapeutic options for neurodegen...,Silent information regulator 1 (SIRT1) is a ni...
4,Therapeutic effect of extracellular vesicles f...,Extracellular vesicles (EVs) are biologically ...
...,...,...
2995,Challenges and opportunities in translational ...,"For decades, basic research on the underlying ..."
2996,Growth and Differentiation of Circulating Stem...,Stem cell therapy is gaining momentum as an ef...
2997,Remote Ischemic Postconditioning vs. Physical ...,There remain debates on neuroprotection and re...
2998,Working Memory for Signs with Poor Visual Reso...,Stimulus degradation adds to working memory lo...


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Title     3000 non-null   object
 1   Abstract  3000 non-null   object
dtypes: object(2)
memory usage: 47.0+ KB


In [8]:
# For downloading a CSV file
filename = 'abstracts' + '.csv'
df.to_csv(filename)