In [2]:
!pip install requests
!pip install bs4
!pip install pandas



In [4]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import concurrent.futures

search_term_input = input("Enter the search term: ")

url = f"https://pubmed.ncbi.nlm.nih.gov/?term={search_term_input}"

r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")

max_page = soup.find("label", class_="of-total-pages").text.strip()
max_page = int(max_page.replace("of ", "").replace(",", ""))

data = {"Name": [], "Authors": [], "Citation": [], "PMID": []}

def scrape_page(page_num):
    page_url = f"https://pubmed.ncbi.nlm.nih.gov/?term={search_term_input}&page={page_num}"
    page_response = requests.get(page_url)
    page_soup = BeautifulSoup(page_response.text, "html.parser")

    names = [i.text.strip() for i in page_soup.find_all("a", class_="docsum-title")]
    auth_name = [i.text.strip() for i in page_soup.find_all("span", class_="docsum-authors full-authors")]
    cite = [i.text.strip() for i in page_soup.find_all("span", class_="docsum-journal-citation full-journal-citation")]
    pmid = [i.text.strip() for i in page_soup.find_all("span", class_="docsum-pmid")]

    return {"Name": names, "Authors": auth_name, "Citation": cite, "PMID": pmid}

with concurrent.futures.ThreadPoolExecutor() as executor:
    results = executor.map(scrape_page, range(1, max_page + 1))

for result in results:
    data["Name"].extend(result["Name"])
    data["Authors"].extend(result["Authors"])
    data["Citation"].extend(result["Citation"])
    data["PMID"].extend(result["PMID"])

df = pd.DataFrame(data)
print(f"Below is the data scraped from PubMed, SEARCH = {search_term_input} : ")
print(df)

Enter the search term: Naturopathy
Below is the data scraped from PubMed, SEARCH = Naturopathy : 
                                                   Name  \
0                                          Naturopathy.   
1     [Naturopathy and complementary medicine in sma...   
2                    Naturopathy: a critical appraisal.   
3                                          Naturopathy.   
4                               [Water in naturopathy].   
...                                                 ...   
1675  The effects of an integrated supportive care i...   
1676  Impact of complementary oral enzyme applicatio...   
1677  Patients with Advanced or Metastasised Non-Sma...   
1678  Zinc for the prevention or treatment of acute ...   
1679  Clinical effectiveness of patient-targeted fee...   

                                                Authors  \
0                                   Smith MJ, Logan AC.   
1                               Stanossek I, Wehrend A.   
2               

In [5]:
df

Unnamed: 0,Name,Authors,Citation,PMID
0,Naturopathy.,"Smith MJ, Logan AC.",Med Clin North Am. 2002 Jan;86(1):173-84. doi:...,11795088
1,[Naturopathy and complementary medicine in sma...,"Stanossek I, Wehrend A.",Tierarztl Prax Ausg K Kleintiere Heimtiere. 20...,34157762
2,Naturopathy: a critical appraisal.,Atwood KC 4th.,MedGenMed. 2003 Dec 30;5(4):39.,14745386
3,Naturopathy.,Nightingale MJ.,Lancet. 1970 Apr 25;1(7652):895. doi: 10.1016/...,4191536
4,[Water in naturopathy].,Marktl W.,Forsch Komplementarmed Klass Naturheilkd. 2003...,12853717
...,...,...,...,...
1675,The effects of an integrated supportive care i...,"Klafke N, Mahler C, von Hagens C, Uhlmann L, B...",Cancer Med. 2019 Jul;8(8):3666-3676. doi: 10.1...,31115192
1676,Impact of complementary oral enzyme applicatio...,"Beuth J, Ost B, Pakdaman A, Rethfeldt E, Bock ...",Cancer Chemother Pharmacol. 2001 Jul;47 Suppl:...,11561873
1677,Patients with Advanced or Metastasised Non-Sma...,"Schad F, Thronicke A, Hofheinz RD, Matthes H, ...",Cancers (Basel). 2024 Apr 22;16(8):1609. doi: ...,38672690
1678,Zinc for the prevention or treatment of acute ...,"Hunter J, Arentz S, Goldenberg J, Yang G, Bear...",BMJ Open. 2021 Nov 2;11(11):e047474. doi: 10.1...,34728441


In [None]:
df.to_csv("pubmed_scraped_data.csv", index=False)