In [1]:

"""
Script to find the newest 50 publications of an author on PubMed.
Requires: pip install biopython
"""

from Bio import Entrez
import time
import sys

def fetch_author_publications(author_name, email, max_results=50):
    """
    Fetch the newest publications for a given author from PubMed.
    
    Args:
        author_name (str): Author name (e.g., "Smith J" or "John Smith")
        email (str): Your email address (required by NCBI)
        max_results (int): Maximum number of results to return (default: 50)
    
    Returns:
        list: List of publication dictionaries with title, authors, journal, date, PMID
    """
    
    # Set email for NCBI (required)
    Entrez.email = email
    
    try:
        # Search for publications by author, sorted by publication date (newest first)
        search_query = f"{author_name}[Author]"
        
        print(f"Searching for publications by: {author_name}")
        print(f"Query: {search_query}")
        
        # Perform the search
        search_handle = Entrez.esearch(
            db="pubmed",
            term=search_query,
            retmax=max_results,
            sort="pub_date",  # Sort by publication date, newest first
            retmode="xml"
        )
        
        search_results = Entrez.read(search_handle)
        search_handle.close()
        
        pmid_list = search_results["IdList"]
        total_found = int(search_results["Count"])
        
        print(f"Found {total_found} total publications")
        print(f"Fetching details for newest {len(pmid_list)} publications...")
        
        if not pmid_list:
            print("No publications found for this author.")
            return []
        
        # Fetch detailed information for each publication
        publications = []
        batch_size = 10  # Process in batches to be nice to NCBI servers
        
        for i in range(0, len(pmid_list), batch_size):
            batch_pmids = pmid_list[i:i + batch_size]
            
            # Fetch publication details
            fetch_handle = Entrez.efetch(
                db="pubmed",
                id=",".join(batch_pmids),
                rettype="medline",
                retmode="xml"
            )
            
            records = Entrez.read(fetch_handle)
            fetch_handle.close()
            
            # Parse each record
            for record in records['PubmedArticle']:
                try:
                    article = record['MedlineCitation']['Article']
                    
                    # Extract publication info
                    title = article.get('ArticleTitle', 'No title available')
                    
                    # Get authors
                    authors = []
                    if 'AuthorList' in article:
                        for author in article['AuthorList']:
                            if 'LastName' in author and 'ForeName' in author:
                                authors.append(f"{author['LastName']} {author['ForeName']}")
                            elif 'CollectiveName' in author:
                                authors.append(author['CollectiveName'])
                    
                    # Get journal info
                    journal = article.get('Journal', {}).get('Title', 'Unknown journal')
                    
                    # Get publication date
                    pub_date = "Unknown date"
                    if 'Journal' in article and 'JournalIssue' in article['Journal']:
                        if 'PubDate' in article['Journal']['JournalIssue']:
                            date_info = article['Journal']['JournalIssue']['PubDate']
                            year = date_info.get('Year', '')
                            month = date_info.get('Month', '')
                            day = date_info.get('Day', '')
                            pub_date = f"{year} {month} {day}".strip()
                    
                    # Get PMID
                    pmid = record['MedlineCitation']['PMID']
                    
                    publications.append({
                        'title': title,
                        'authors': authors,
                        'journal': journal,
                        'date': pub_date,
                        'pmid': str(pmid),
                        'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
                    })
                    
                except KeyError as e:
                    print(f"Error parsing record: {e}")
                    continue
            
            # Be respectful to NCBI servers
            time.sleep(0.5)
        
        return publications
        
    except Exception as e:
        print(f"Error fetching publications: {e}")
        return []

def display_publications(publications):
    """Display publications in a formatted way."""
    
    if not publications:
        print("No publications to display.")
        return
    
    print(f"\n{'='*80}")
    print(f"FOUND {len(publications)} PUBLICATIONS")
    print(f"{'='*80}")
    
    for i, pub in enumerate(publications, 1):
        print(f"\n{i}. {pub['title']}")
        print(f"   Authors: {', '.join(pub['authors'][:3])}" + 
              (f" ... and {len(pub['authors'])-3} others" if len(pub['authors']) > 3 else ""))
        print(f"   Journal: {pub['journal']}")
        print(f"   Date: {pub['date']}")
        print(f"   PMID: {pub['pmid']}")
        print(f"   URL: {pub['url']}")

def save_to_file(publications, filename="publications.txt"):
    """Save publications to a text file."""
    
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"Found {len(publications)} publications\n")
        f.write("="*80 + "\n\n")
        
        for i, pub in enumerate(publications, 1):
            f.write(f"{i}. {pub['title']}\n")
            f.write(f"   Authors: {', '.join(pub['authors'])}\n")
            f.write(f"   Journal: {pub['journal']}\n")
            f.write(f"   Date: {pub['date']}\n")
            f.write(f"   PMID: {pub['pmid']}\n")
            f.write(f"   URL: {pub['url']}\n\n")
    
    print(f"Publications saved to {filename}")

def main():
    """Main function to run the script."""
    
    # Configuration - modify these values
    AUTHOR_NAME = "Vince I. Madai"  # Change this to the author you're searching for
    YOUR_EMAIL = "valbuerga@gmail.com"  # REQUIRED: Change to your email
    MAX_RESULTS = 50
    
    if YOUR_EMAIL == "your.email@example.com":
        print("ERROR: Please set your email address in the YOUR_EMAIL variable.")
        print("This is required by NCBI's terms of service.")
        sys.exit(1)
    
    # Fetch publications
    publications = fetch_author_publications(AUTHOR_NAME, YOUR_EMAIL, MAX_RESULTS)
    
    # Display results
    display_publications(publications)
    
    # Save to file
    if publications:
        save_to_file(publications, f"{AUTHOR_NAME.replace(' ', '_')}_publications.txt")

if __name__ == "__main__":
    main()

Searching for publications by: Vince I. Madai
Query: Vince I. Madai[Author]
Found 64 total publications
Fetching details for newest 50 publications...

FOUND 50 PUBLICATIONS

1. Early Complications After Mild to Moderate Ischemic Stroke and Impact on 3-Month Outcome: The Multicenter Prospective Stroke Unit Plus Cohort Study.
   Authors: Sobesky Jan, Madai Vince Istvan, Zweynert Sarah ... and 14 others
   Journal: Journal of the American Heart Association
   Date: 2025 Feb 18
   PMID: 39921500
   URL: https://pubmed.ncbi.nlm.nih.gov/39921500/

2. External validation of AI-based scoring systems in the ICU: a systematic review and meta-analysis.
   Authors: Rockenschaub Patrick, Akay Ela Marie, Carlisle Benjamin Gregory ... and 6 others
   Journal: BMC medical informatics and decision making
   Date: 2025 Jan 06
   PMID: 39762808
   URL: https://pubmed.ncbi.nlm.nih.gov/39762808/

3. The authors reply.
   Authors: Rockenschaub Patrick, Madai Vince Istvan, Frey Dietmar
   Journal: Critical 