In [4]:
# prompt: https://doi.org/10.1080/00031305.2017.1380080 from this doi generate APA

import re
import requests
from bs4 import BeautifulSoup
import json


def request_url(url,method="GET",data=None):
    # เพิ่มส่วนหัว (Headers) เพื่อเลียนแบบเบราว์เซอร์
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
    }
    response = None
    if method == "GET":
        response = requests.get(url, headers=headers)
    else:
        # กำหนด headers ด้วย Content-Type เป็น application/json
        headers["Content-Type"] = "application/json"

        # ส่งคำขอ POST
        response = requests.post(url, data=json.dumps(data), headers=headers)      

    # ตรวจสอบสถานะการตอบกลับ
    if response.status_code == 200:
        return response
    elif response.status_code == 403:
        print("Access is forbidden. Try modifying headers or check if the server blocks scraping.")
        response.raise_for_status() # Raise an exception for bad status codes
        return False
    elif response.status_code == 404:
        print("Page not found. Check the URL.")
        return False
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        print(f"Failed to retrieve the webpage. Status code: {response.reason}")
        return False

def doi_to_apa(doi):
    """
    Attempts to generate an APA citation from a DOI.  This is a heuristic approach
    and may not be perfectly accurate in all cases.  It requires online access
    to resolve the DOI to metadata.

    Args:
        doi: The DOI string (e.g., "10.1080/00031305.2017.1380080").

    Returns:
        A string containing the APA citation, or an error message if the DOI
        cannot be resolved or parsed.
    """
    try:
        

        url = f"https://doi.org/{doi}"
        # response = requests.get(url, headers=headers, allow_redirects=True)  # Add a timeout
        response = request_url(url)
        response.raise_for_status() # Raise an exception for bad status codes


        soup = BeautifulSoup(response.content, "html.parser")

        # Extract title
        title_tag = soup.find("meta", property="og:title")
        title = title_tag["content"] if title_tag else "Title not found"

        # Extract authors
        author_tags = soup.find_all("meta", property="citation_author")
        authors = [author["content"] for author in author_tags]
        if not authors:  # Fallback for different metadata formats
            author_tags = soup.find_all("meta", property="og:description")
            if author_tags:
                description = author_tags[0]["content"]
                author_match = re.search(r"by (.*?)\.", description)
                if author_match:
                    authors = [author_match.group(1)]
                else:
                    authors = ["Authors not found"]
            else:
                authors = ["Authors not found"]


        # Extract publication year and journal
        year_tag = soup.find("meta", property="citation_publication_date")
        year = year_tag["content"] if year_tag else "Year not found"


        journal_tag = soup.find("meta", property="citation_journal_title")
        journal = journal_tag["content"] if journal_tag else "Journal not found"

        # Construct APA citation (basic format, refinement may be needed)
        apa_citation = f"{', '.join(authors)}. ({year}). {title}. *{journal}*."

        return apa_citation

    except requests.exceptions.RequestException as e:
        return f"Error fetching DOI information: {e}"
    except Exception as e:  # Catch broader exceptions
        return f"An error occurred: {e}"

doi = "10.1080/00031305.2017.1380080"
apa_citation = doi_to_apa(doi)
apa_citation

Access is forbidden. Try modifying headers or check if the server blocks scraping.


'Error fetching DOI information: 403 Client Error: Forbidden for url: https://www.tandfonline.com/doi/full/10.1080/00031305.2017.1380080'

In [6]:
from habanero import Crossref

def doi_to_apa(doi):
    cr = Crossref()
    try:
        # Fetch metadata for the DOI
        result = cr.works(ids=doi)
        title = result['message']['title'][0]
        authors = result['message'].get('author', [])
        journal = result['message'].get('container-title', [""])[0]
        year = result['message']['published-print']['date-parts'][0][0]
        volume = result['message'].get('volume', "")
        issue = result['message'].get('issue', "")
        pages = result['message'].get('page', "")
        doi_url = result['message']['URL']

        # Format authors in APA style
        author_list = []
        for author in authors:
            name = f"{author['family']}, {author['given'][0]}."
            author_list.append(name)
        formatted_authors = ", ".join(author_list)
        
        # Create APA style reference
        apa_reference = (
            f"{formatted_authors} ({year}). {title}. *{journal}, {volume}*({issue}), {pages}. {doi_url}"
        )
        return apa_reference
    except Exception as e:
        return f"Error fetching DOI metadata: {e}"

# Example usage
doi = "10.1080/00031305.2017.1380080"
print(doi_to_apa(doi))


Taylor, S., Letham, B. (2018). Forecasting at Scale. *The American Statistician, 72*(1), 37-45. https://doi.org/10.1080/00031305.2017.1380080


In [7]:
from habanero import Crossref
import json

def doi_to_json(doi):
    cr = Crossref()
    try:
        # Fetch metadata for the DOI
        result = cr.works(ids=doi)
        return json.dumps(result, indent=4)  # Pretty-print JSON
    except Exception as e:
        return f"Error fetching DOI metadata: {e}"

# Example usage
doi = "10.1080/00031305.2017.1380080"
doi_metadata_json = doi_to_json(doi)
print(doi_metadata_json)


{
    "status": "ok",
    "message-type": "work",
    "message-version": "1.0.0",
    "message": {
        "indexed": {
            "date-parts": [
                [
                    2024,
                    12,
                    30
                ]
            ],
            "date-time": "2024-12-30T18:39:40Z",
            "timestamp": 1735583980859
        },
        "reference-count": 19,
        "publisher": "Informa UK Limited",
        "issue": "1",
        "content-domain": {
            "domain": [
                "www.tandfonline.com"
            ],
            "crossmark-restriction": true
        },
        "short-container-title": [
            "The American Statistician"
        ],
        "published-print": {
            "date-parts": [
                [
                    2018,
                    1,
                    2
                ]
            ]
        },
        "DOI": "10.1080/00031305.2017.1380080",
        "type": "journal-article",
        "created"