In [2]:
import requests
import json
import os

# Define the search keyword and API endpoint
keyword = "Machine Learning"
endpoint = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"

# Make a request to the Europe PMC API
params = {
    'query': keyword,
    'resultType': 'core',
    'pageSize': 5,
    'format': 'json'
}
response = requests.get(endpoint, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    articles = data.get('resultList', {}).get('result', [])
    
    # Create a directory to save the files
    if not os.path.exists('stage1_papers_downloaded'):
        os.makedirs('stage1_papers_downloaded')
    
    # Download the files
    for i, article in enumerate(articles):
        pmcid = article.get('pmcid')
        if pmcid:
            # Change 'fullTextXML' to 'fullTextPDF', 'fullTextHTML', or 'fullTextTXT' for different formats
            file_url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextXML"
            file_response = requests.get(file_url)
            if file_response.status_code == 200:
                file_path = f'stage1_papers_downloaded/article_{i+1}.xml'
                with open(file_path, 'wb') as file:
                    file.write(file_response.content)
                print(f"Downloaded: {file_path}")
            else:
                print(f"Failed to download file for PMCID: {pmcid}")
else:
    print("Failed to retrieve data from Europe PMC API")

Downloaded: stage1_papers_downloaded/article_5.xml
