## NLP PubMed Miner

### 1.1 Importing Libraries

In [1]:
from Bio import Entrez
from Bio import Medline
from io import StringIO
import pandas as pd
import time

### 1.2 Setup Entrez Parameters

In [2]:
Entrez.email = "deepikapratapa27@gmail.com"

### 1.3 Search and Fetch Abstracts

In [3]:
def fetch_pubmed_abstracts(query, max_records=100, retstart=0):
    """
    Fetch PubMed abstracts based on a search query.
    """
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_records, retstart=retstart)
    record = Entrez.read(handle)
    id_list = record["IdList"]
    
    if not id_list:
        return []
    
    handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
    abstracts = handle.read()
    return abstracts

### 1.4 Save Abstracts in DataFrame

In [4]:
def parse_abstracts(raw_text):
    """
    Parse MEDLINE-formatted abstracts into structured records.
    """
    records = list(Medline.parse(StringIO(raw_text)))
    
    cleaned = []
    for record in records:
        cleaned.append({
            'PMID': record.get('PMID', ''),
            'Title': record.get('TI', ''),
            'Abstract': record.get('AB', ''),
            'Date': record.get('DP', '')
        })
    
    return pd.DataFrame(cleaned)

### 1.5 Run for Term

In [5]:
query = "fever AND India"
raw_data = fetch_pubmed_abstracts(query, max_records=100)
df = parse_abstracts(raw_data)

# Show results
df.head()

Unnamed: 0,PMID,Title,Abstract,Date
0,40747162,Methicillin-Resistant Staphylococcus aureus Re...,Renal abscesses are rare in children. This rep...,2025 Jul
1,40746790,Diverse Manifestations of Central Nervous Syst...,OBJECTIVES: This study aims to evaluate and ch...,2025 Jun
2,40742524,Anti-chikungunya Activity of a Cinnamic Acid D...,Chikungunya virus (CHIKV) is an arthropod-born...,2025 Jul 31
3,40742003,Paradoxical Hemoglobin Drop Post-Transfusion: ...,"Hyperhaemolysis syndrome (HHS) is a rare, seve...",2025 Jul 31
4,40741554,Clinico-Epidemiological Profile of Dengue in C...,Background Dengue has emerged as the most comm...,2025 Jun


### 1.6 Save to CSV

In [6]:
df.to_csv("../Pubmed-NLP/data/pubmed_fever_india.csv", index=False)