# PubMed

In [20]:
import os

import pandas as pd
import requests
from Bio import Entrez, Medline

# Set your email for NCBI API access
Entrez.email = "your.email@example.com"


# Search for articles related to a given term
def search_pubmed(query, max_results=1):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    handle.close()
    return record["IdList"]


# Fetch article details using PubMed IDs
def fetch_pubmed_details(id_list):
    handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    handle.close()
    return records

In [30]:
from pprint import pprint

for i in records:
    pprint(i)

{'AB': 'OBJECTIVE: To examine changes in trends of the incidence and '
       'characteristics of pediatric complicated rhinosinusitis with respect '
       'to the coronavirus disease 2019 (COVID-19) pandemic. STUDY DESIGN: '
       'Retrospective cohort. SETTING: Single tertiary-care center. METHODS: A '
       "review of patients who presented to St. Louis Children's Hospital "
       '(SLCH) with complicated rhinosinusitis from 2017 to 2022 was '
       'performed. Clinical and follow-up data were analyzed in association '
       'with COVID-19. RESULTS: Eighty-three patients with complicated '
       'rhinosinusitis were identified and analyzed according to '
       'hospitalization before or after March 2020. No differences in '
       'demographic variables were found between the two groups. More patients '
       'had developmental comorbidities in the COVID-19 group (7 vs 1, P = '
       '.049). More patients with intracranial complications (55% vs 45%, P = '
       ".48) and 

In [32]:
from pprint import pprint

records = fetch_pubmed_details(search_pubmed("COVID-19"))
record = records[0]
authors = record.get("FAU", [])
authors = [author.replace(",", "") for author in authors]
print("Authors:", authors)

Authors: ['Bhat Amrita N', 'Wang Johnny', 'Yang Anna', 'Molter David', 'Dunsky Katherine A', 'Menezes Maithilee', 'Lieu Judith E C']


# TEST

In [15]:
import requests
import os
from dotenv import load_dotenv

load_dotenv()
# Replace these with your actual credentials
API_KEY = os.getenv("ELSEVIER_API_KEY")
INST_TOKEN = os.getenv("ELSEVIER_INSTTOKEN")
DOI = "10.1016/j.profoo.2016.02.084"  # Example DOI for demonstration

# Construct the API URL
BASE_URL = "https://api.elsevier.com/content/article/doi/"
API_URL = f"{BASE_URL}{DOI}"

# Set headers
headers = {
    "X-ELS-APIKey": API_KEY,
    "X-ELS-Insttoken": INST_TOKEN,
    "Accept": "application/pdf",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
}

try:
    # Make the API request with redirect following
    response = requests.get(API_URL, headers=headers, allow_redirects=True)

    # Check if the request was successful
    if response.status_code == 200:
        # Verify that the content type is PDF
        if response.headers.get("Content-Type") == "application/pdf":
            # Save the PDF to a file
            filename = DOI.replace("/", "_") + ".pdf"
            with open(filename, "wb") as pdf_file:
                pdf_file.write(response.content)
            print(f"PDF downloaded successfully: {filename}")
        else:
            print(
                f"Error: Content-Type is {response.headers.get('Content-Type')}, not PDF."
            )
    else:
        print(f"Failed to download PDF. Status code: {response.status_code}")
        print(f"Response: {response.text}")
except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

PDF downloaded successfully: 10.1016_j.profoo.2016.02.084.pdf


# scopus

In [None]:
def make_request(search_query: str, max_papers: int = 20000) -> list:
    retries, start, count = 0, 0, 25
    results = []
    while len(results) < max_papers and start < 20000:
        params = {
            "query": search_query,
            "start": start,
            "count": count,
            "view": "COMPLETE",
        }

        headers = {"Accept": "application/json", "X-ELS-APIKey": api_key}

        response = requests.get(
            "https://api.elsevier.com/content/search/scopus",
            params=params,
            headers=headers,
            timeout=10,
        )
        print(f"Status code: {response.status_code}")
        if response.status_code == 200:
            data = response.json()
            if "search-results" in data and "entry" in data["search-results"]:
                results.extend(data["search-results"]["entry"])
                print(
                    f"Retrieved {len(data['search-results']['entry'])} results. Total: {len(results)}"
                )
            else:
                print("No results found in the response.")
                break
            start += count
        elif response.status_code == 429:
            wait_time = 2**retries
            print(f"Received 429, waiting {wait_time}")
            time.sleep(wait_time)
            retries += 1
        else:
            print(f"Failed to retrieve results: {response.status_code}")
            break

    return results

In [5]:
from dotenv import load_dotenv
import requests
import os
import csv
import time
from pprint import pprint

load_dotenv()

api_key = os.getenv("ELSEVIER_API_KEY")


# --- Scopus --- #
def get_request(api_key: str, query: str) -> dict:
    headers = {
        "Accept": "application/json",
        "X-ELS-APIKey": api_key,
        "view": "STANDARD",
    }

    url = "https://api.elsevier.com/content/search/sciencedirect"
    params = {"query": query, "offset": 0}

    response = requests.get(url, params=params, headers=headers)
    return response.json()


data = get_request(api_key, query="microbiology")


for idx, doc in enumerate(data):
    # Process each document as needed
    pprint(data)

    # print(f"""{idx + 1}""")
    # print(f"\nPaper Link: {doc['link'][2]['@href']}")
    # print(f"\nPaper Author: {doc.get('dc:creator', '')}")
    # print(f"\nPaper DOI: {doc.get('prism:doi', '')}")
    # print(f"\nAffiliation: {doc.get('affiliation', [{}])[0].get('affilname', '')}")
    # print(f"\nPublication Name: {doc.get('prism:publicationName', '')}")
    # print(f"\nPublish Date: {doc.get('prism:coverDate', '')}")
    # print("----" * 30)
    # break

{'service-error': {'status': {'statusCode': 'AUTHORIZATION_ERROR',
                              'statusText': 'The requestor is not authorized '
                                            'to access the requested view or '
                                            'fields of the resource'}}}


In [2]:
from dotenv import load_dotenv
import requests
import os
import csv
import time
from pprint import pprint

load_dotenv()

api_key = os.getenv("ELSEVIER_API_KEY")

params = {
    "query": "microbiology",
    "count": 10,
}

headers = {"Accept": "application/json", "X-ELS-APIKey": api_key}

response = requests.get(
    "https://api.elsevier.com/content/search/scopus",
    params=params,
    headers=headers,
    timeout=10,
)
response.json()

{'search-results': {'opensearch:totalResults': '5015857',
  'opensearch:startIndex': '0',
  'opensearch:itemsPerPage': '10',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'microbiology',
   '@startPage': '0'},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=10&query=microbiology',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=10&query=microbiology',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=10&count=10&query=microbiology',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'last',
    '@href': 'https://api.elsevier.com/content/search/scopus?start=4990&count=10&query=microbiology',
    '@type': 'application/json'}],
  'entry': [{'@_fa': 'true',
    'link': [{'@_fa': 'true',
      '@r