In [1]:
from Bio import Entrez
import csv

def pubmed_scraper(keyword, start_date, end_date, filename):
    # Set email address for NCBI id
    Entrez.email = "isabela.baker@uth.tmc.edu"  # Replace with applicable email

    # Define search
    search_query = f"{keyword} AND ({start_date}:{end_date}[Date - Publication])"

    try:
        # Perform search 
        handle = Entrez.esearch(db="pubmed", term=search_query, retmode="xml")
        record = Entrez.read(handle)
        handle.close()

        # Fetch paper details 
        papers = []
        for pm_id in record["IdList"]:
            pubmed_article = Entrez.efetch(db="pubmed", id=pm_id, retmode="xml")
            pubmed_record = Entrez.read(pubmed_article)[0]
            pubmed_article.close()

            title = pubmed_record["MedlineCitation"]["Article"]["ArticleTitle"]
            authors = ", ".join([author["LastName"] + " " + author["Initials"] for author in pubmed_record["MedlineCitation"]["Article"]["AuthorList"]])
            publication_time = pubmed_record["MedlineCitation"]["Article"]["ArticleDate"] if "ArticleDate" in pubmed_record["MedlineCitation"]["Article"] else ""
            abstract = pubmed_record["MedlineCitation"]["Article"]["Abstract"]["AbstractText"][0] if "Abstract" in pubmed_record["MedlineCitation"]["Article"] else ""
            mesh = pubmed_record["MedlineCitation"]["MeshHeadingList"]
            descriptors = ','.join(term["DescriptorName"] for term in mesh)

            papers.append([title, authors, publication_time, abstract, descriptors])

        # Save data to csv
        with open(filename, "w", newline="", encoding="utf-8") as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["Title", "Authors", "Publication Time", "Abstract", "Mesh Descriptors"])
            csvwriter.writerows(papers)

        print(f"Data has been successfully written to {filename}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    keyword = "HIV"
    start_date = "2020/01/01"
    end_date = "2020/08/30"
    filename = "pubmed_data.csv"

    pubmed_scraper(keyword, start_date, end_date, filename)


An error occurred: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)>
