#### Requirements

In [2]:
from urllib.request import urlopen
from urllib.error import HTTPError
from urllib.error import URLError
import xml.etree.cElementTree as et
import xmltodict, json, os
import pandas as pd
from fpdf import FPDF

#### ESearch

In [6]:
api_key = "b30c3c67620ea99cf640ccbfb01422554c08"

base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"

db = "pubmed"

term = "multiple+sclerosis[Title/Abstract]+AND+corpus+callosum[Title/Abstract]"

retmax = "100000"

rettype = "uilist"

url = base_url + "db=" + db + "&term=" + term + "&retmax=" + retmax + "&rettype=" + rettype + "&usehistory=y" + "&api_key=" + api_key

try:
    search_results = json.dumps(xmltodict.parse(urlopen(url).read().decode('utf-8')), indent = 4)
    with open("pubmed_search_results.json", "w") as output:
        output.write(search_results)
except HTTPError as e:
    print(e)
except URLError as e:
    print('Not found!')

In [7]:
# Set Query Key and Web Environment to use history later

with open("pubmed_search_results.json", "r") as search_results:
    search_results = json.load(search_results)

web_env = search_results["eSearchResult"]["WebEnv"]
query_key = search_results["eSearchResult"]["QueryKey"]

#### EFetch

In [8]:
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"

rettype = "xml"

url = base_url + "db=" + db + "&retmax=" + retmax + "&rettype=" + rettype + "&query_key=" + query_key + "&WebEnv=" + web_env  + "&api_key=" + api_key

try:
    fetch_results = json.dumps(xmltodict.parse(urlopen(url).read().decode('utf-8')), indent = 4)
    with open("pubmed_fetch_results.json", "w") as output:
        output.write(fetch_results)
except HTTPError as e:
    print(e)
except URLError as e:
    print('Not found!')

In [9]:
with open("pubmed_fetch_results.json", "r") as fetch_results:
    fetch_results = json.load(fetch_results)

IDs = [None] * len(fetch_results["PubmedArticleSet"]["PubmedArticle"])
titles = [None] * len(fetch_results["PubmedArticleSet"]["PubmedArticle"])
abstracts = [""] * len(fetch_results["PubmedArticleSet"]["PubmedArticle"])
for index, article in enumerate(fetch_results["PubmedArticleSet"]["PubmedArticle"]):
    IDs[index] = article["MedlineCitation"]["PMID"]["#text"]
    titles[index] = article["MedlineCitation"]["Article"]["ArticleTitle"]
    if "Abstract" in article["MedlineCitation"]["Article"]:
        if isinstance(article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], str):
            IDs[index] = article["MedlineCitation"]["PMID"]["#text"]
            titles[index] = article["MedlineCitation"]["Article"]["ArticleTitle"]
            abstracts[index] = article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
        elif isinstance(article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], list):
            abstract = ""
            for item in article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]:
                if isinstance(item, dict):
                    error = 0
                    abstract += item["#text"]
                else:
                    error = 2
                    break
            if error == 2:
                print(f'Check this article manually. PMID: {article["MedlineCitation"]["PMID"]["#text"]}')
                abstracts[index] = "No abstract"
            else:
                abstracts[index] += abstract
        elif isinstance(article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], dict):
            abstracts[index] = article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]["#text"]
    else:
        abstracts[index] = "No abstract"

articles_list = {'ID': IDs, 'Title': titles, 'Abstract': abstracts}
articles_list = pd.DataFrame(articles_list)
articles_list.to_csv("pubmed_articles.csv")

Check this article manually. PMID: 27400790
Check this article manually. PMID: 26338327
Check this article manually. PMID: 26157006


#### Create PDF

In [19]:
fonts = os.listdir("fonts/")
for file in fonts:
    if file.endswith(".pkl"):
        os.remove(os.path.join("fonts/", file))

pdf = FPDF()
pdf.add_font('Roboto', style = "", fname = os.path.abspath("fonts/Roboto-Regular.ttf"), uni = True)
pdf.add_font('Roboto', style = "B", fname = os.path.abspath("fonts/Roboto-Bold.ttf"), uni = True)
pdf.set_margins(left = 20, top = 20, right = 20)

for article_number in range(1, len(IDs)):

    pdf.add_page()
    pdf.set_text_color(r = 41, g = 128, b = 185)
    pdf.set_font('Roboto', '', 10)
    pdf.multi_cell(w = 0, h = 5, txt = f"PIMD: {IDs[article_number-1]}", align = "L")

    pdf.set_text_color(r = 0, g = 0, b = 0)
    pdf.set_font('Roboto', style = 'B', size = 12)
    pdf.multi_cell(w = 0, h = 5, txt = f"{titles[article_number-1]}")

    pdf.ln(h = 2.5)

    pdf.set_font('Roboto', style = '', size = 10)
    pdf.multi_cell(w = 0, h = 5, txt = f"{abstracts[article_number-1]}")

pdf.output('pubmed_articles.pdf')


''