Skip to content

Commit

Permalink
refactor content negotiation
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Oct 8, 2023
1 parent 0830e41 commit 5ce7e86
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 35 deletions.
27 changes: 4 additions & 23 deletions rogue_scholar_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,30 +158,11 @@ async def post(slug, suffix=None):
elif slug in prefixes and suffix:
doi = f"https://doi.org/{slug}/{suffix}"
if format_ in formats:
content_types = {
"bibtex": "application/x-bibtex",
"ris": "application/x-research-info-systems",
"csl": "application/vnd.citationstyles.csl+json",
"citation": f"text/x-bibliography; style={style}; locale={locale}",
}
content_type = content_types.get(format_)
metadata = get_doi_metadata_from_ra(doi, headers={"Accept": content_type})
if not metadata:
logger.warning(f"Metadata not found for {doi}")
response = get_doi_metadata_from_ra(doi, format_, style, locale)
if not response:
logger.warning("Metadata not found")
return {"error": "Metadata not found."}, 404
if format_ == "csl":
filename = f"{slug}-{suffix}.json"
elif format_ == "ris":
filename = f"{slug}-{suffix}.ris"
elif format_ == "bibtex":
filename = f"{slug}-{suffix}.bib"
else:
filename = f"{slug}-{suffix}.txt"
options = {
"Content-Type": content_type,
"Content-Disposition": f"attachment; filename={filename}",
}
return (metadata, 200, options)
return (response["data"], 200, response["options"])
else:
try:
response = (
Expand Down
46 changes: 40 additions & 6 deletions rogue_scholar_api/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,47 @@
import requests
import re
from typing import Optional


def get_doi_metadata_from_ra(doi: str, headers) -> str:
"""use DOI content negotiation to get metadata in various formats"""
response = requests.get(doi, headers=headers)
response.encoding = 'UTF-8'
def sanitize_suffix(str):
# Regular expression to only allow certain characters in DOI suffix
# taken from https://www.crossref.org/blog/dois-and-matching-regular-expressions/
m = re.match(r"^\[-._;\(\)/:A-Z0-9\]+$", str)
print(m)
return m


def get_doi_metadata_from_ra(
doi: str, format_: str = "bibtex", style: str = "apa", locale: str = "en-US"
) -> Optional[dict]:
"""use DOI content negotiation to get metadata in various formats.
format_ can be bibtex, ris, csl, citation, with bibtex as default."""

content_types = {
"bibtex": "application/x-bibtex",
"ris": "application/x-research-info-systems",
"csl": "application/vnd.citationstyles.csl+json",
"citation": f"text/x-bibliography; style={style}; locale={locale}",
}
content_type = content_types.get(format_) or "application/x-bibtex"
response = requests.get(doi, headers={"Accept": content_type})
response.encoding = "UTF-8"
if response.status_code >= 400:
return "Metadata not found"
return response.text.strip()
return None
basename = doi.replace("/", "-")
if format_ == "csl":
ext = "json"
elif format_ == "ris":
ext = "ris"
elif format_ == "bibtex":
ext = "bib"
else:
ext = "txt"
options = {
"Content-Type": content_type,
"Content-Disposition": f"attachment; filename={basename}.{ext}",
}
return {"doi": doi, "data": response.text.strip(), "options": options}


# def format_metadata(meta: dict, to: str = "bibtex"):
Expand Down
38 changes: 32 additions & 6 deletions tests/test-utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
def test_get_doi_metadata_bibtex():
"get doi metadata in bibtex format"
doi = "https://doi.org/10.53731/ybhah-9jy85"
headers = {"Accept": "application/x-bibtex"}
bibtex = get_doi_metadata_from_ra(doi, headers=headers)
result = get_doi_metadata_from_ra(doi)
assert (
bibtex
result["data"]
== """@article{Fenner_2023,
\tdoi = {10.53731/ybhah-9jy85},
\turl = {https://doi.org/10.53731%2Fybhah-9jy85},
Expand All @@ -26,9 +25,36 @@ def test_get_doi_metadata_bibtex():
def test_get_doi_metadata_citation():
"get doi metadata as formatted citation"
doi = "https://doi.org/10.53731/ybhah-9jy85"
headers = {"Accept": "text/x-bibliography; style=apa"}
citation = get_doi_metadata_from_ra(doi, headers=headers)
result = get_doi_metadata_from_ra(doi, format_="citation")
assert (
citation
result["data"]
== "Fenner, M. (2023). The rise of the (science) newsletter. https://doi.org/10.53731/ybhah-9jy85"
)


# def test_sanitize_cool_suffix():
# "sanitize cool suffix"
# suffix = "sfzv4-xdb68"
# sanitized_suffix = sanitize_suffix(suffix)
# assert sanitized_suffix == "sfzv4-xdb68"


# def test_sanitize_semantic_suffix():
# "sanitize semantic suffix"
# suffix = "dini-blog.20230724"
# sanitized_suffix = sanitize_suffix(suffix)
# assert sanitized_suffix == "dini-blog.20230724"


# def test_sanitize_sici_suffix():
# "sanitize sici suffix"
# suffix = "0002-8231(199412)45:10<737:TIODIM>2.3.TX;2-M"
# sanitized_suffix = sanitize_suffix(suffix)
# assert sanitized_suffix == "0002-8231(199412)45:10<737:TIODIM>2.3.TX;2-M"


# def test_sanitize_invalid_suffix():
# "sanitize invalid suffix"
# suffix = "000 333"
# sanitized_suffix = sanitize_suffix(suffix)
# assert sanitized_suffix == "0002-8231(199412)45:10<737:TIODIM>2.3.TX;2-M"

0 comments on commit 5ce7e86

Please sign in to comment.