In [None]:
from Bio import Entrez
import xml.etree.ElementTree as ET
import csv
import json
import time

# Set your email here to use NCBI's Entrez API
Entrez.email = ""

def search_pubmed(keyword, retstart, retmax):
    handle = Entrez.esearch(db="pubmed", term=keyword, retstart=retstart, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    return record

def fetch_details(id_list):
    ids = ','.join(id_list)
    handle = Entrez.efetch(db="pubmed", id=ids, retmode="xml")
    records = handle.read()
    return records

# List of keywords for searching
keywords = [
    "Vaccine", "Vaccination", "Immunization", "Inoculation",
    "COVID-19 vaccine", "mRNA vaccine", "AstraZeneca", "Pfizer", "Moderna",
    "Johnson & Johnson", "Childhood vaccines", "Flu shot",
    "Vaccine myth", "Vaccine conspiracy", "Vaccine side effects", "Vaccine dangers",
    "Vaccine risks", "Adjuvant", "Thimerosal", "Mercury in vaccines", "Aluminum in vaccines",
    "Vaccine reaction", "Vaccine injury", "Vaccine-induced",
    "Anti-vax", "Vaccine mandate", "Vaccine passport", "Vaccine hesitancy", "Vaccine refusal",
    "Vaccine trials", "Vaccine approval", "Vaccine distribution", "Vaccine rollout"
]

articles = []

# Define how many results you want per keyword (e.g., 1000)
total_results_per_keyword = 1000
batch_size = 100  # Number of results to fetch per request

for keyword in keywords:
    print(f"Searching PubMed for: {keyword}")
    total_results_fetched = 0

    while total_results_fetched < total_results_per_keyword:
        search_result = search_pubmed(keyword, total_results_fetched, batch_size)
        id_list = search_result['IdList']
        count = int(search_result['Count'])

        if not id_list:
            break  # Break the loop if no more results are returned

        xml_data = fetch_details(id_list)
        root = ET.fromstring(xml_data)

        for article in root.findall('.//PubmedArticle'):
            article_title = article.find('.//ArticleTitle').text
            abstract = article.find('.//Abstract/AbstractText')
            abstract_text = abstract.text if abstract is not None else "No abstract available"

            articles.append({
                "Keyword": keyword,
                "Title": article_title,
                "Abstract": abstract_text
            })

        total_results_fetched += len(id_list)

        # Delay between each batch fetch
        time.sleep(1)

        if total_results_fetched >= count:
            break  # Break the loop if all available results are fetched

    # Delay between each keyword search
    time.sleep(2)

# Saving data to CSV
with open('pubmed_articles.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Keyword', 'Title', 'Abstract']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for article in articles:
        writer.writerow(article)

# Saving data to JSON
with open('pubmed_articles.json', 'w', encoding='utf-8') as jsonfile:
    json.dump(articles, jsonfile, indent=4)

print("Data saved to pubmed_articles.csv and pubmed_articles.json")



Searching PubMed for: Vaccine
Searching PubMed for: Vaccination
Searching PubMed for: Immunization
Searching PubMed for: Inoculation
Searching PubMed for: COVID-19 vaccine
Searching PubMed for: mRNA vaccine
Searching PubMed for: AstraZeneca
Searching PubMed for: Pfizer
Searching PubMed for: Moderna
Searching PubMed for: Johnson & Johnson
Searching PubMed for: Childhood vaccines
Searching PubMed for: Flu shot
Searching PubMed for: Vaccine myth
Searching PubMed for: Vaccine conspiracy
Searching PubMed for: Vaccine side effects
Searching PubMed for: Vaccine dangers
Searching PubMed for: Vaccine risks
Searching PubMed for: Adjuvant
Searching PubMed for: Thimerosal
Searching PubMed for: Mercury in vaccines
Searching PubMed for: Aluminum in vaccines
Searching PubMed for: Vaccine reaction
Searching PubMed for: Vaccine injury
Searching PubMed for: Vaccine-induced
Searching PubMed for: Anti-vax
Searching PubMed for: Vaccine mandate
Searching PubMed for: Vaccine passport
Searching PubMed for: Va