Skip to content

Commit

Permalink
Extend Gilda's grounder (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Feb 2, 2024
1 parent d7ee99b commit 34c08ae
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 31 deletions.
4 changes: 2 additions & 2 deletions lexica/phenotype/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
# TODO should there be others?
],
),
biolexica.Input(source="efo", processor="pyobo"), # TODO find subset of EFO
biolexica.Input(source="efo", processor="pyobo", ancestors=["EFO:0000408"]),
biolexica.Input(source="ncit", processor="pyobo", ancestors=["ncit:C2991"]),
# biolexica.Input(source="umls", processor="pyobo"), # TODO find subset of UMLS
# biolexica.Input(source="ncit", processor="pyobo"), # TODO find subset of NCIT
],
excludes=["doid:4"],
)
Expand Down
82 changes: 75 additions & 7 deletions src/biolexica/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import biosynonyms
import gilda
import pyobo
from curies import Reference
from gilda.grounder import load_entries_from_terms_file
from gilda.process import normalize
from pydantic import BaseModel, Field
Expand All @@ -30,6 +31,7 @@
logger = logging.getLogger(__name__)

HERE = Path(__file__).parent.resolve()
LEXICA = HERE.parent.parent.joinpath("lexica")
Processor = Literal["pyobo", "bioontologies", "biosynonyms", "gilda"]

GrounderHint = Union[gilda.Grounder, str, Path]
Expand All @@ -56,22 +58,88 @@ class Configuration(BaseModel):
URL_FMT = "https://github.com/biopragmatics/biolexica/raw/main/lexica/{key}/terms.tsv.gz"


def load_grounder(grounder: GrounderHint) -> gilda.Grounder:
class Match(BaseModel):
"""Model a scored match from Gilda."""

reference: Reference
name: str
score: float

@property
def curie(self) -> str:
"""Get the reference's curie."""
return self.reference.curie

@classmethod
def from_gilda(cls, scored_match: gilda.ScoredMatch):
"""Construct a match from a Gilda object."""
return cls(
reference=Reference(prefix=scored_match.term.db, identifier=scored_match.term.id),
name=scored_match.term.entry_name,
score=scored_match.score,
)


class Grounder(gilda.Grounder):
"""Wrap a Gilda grounder with additional functionality."""

def get_matches(
self,
s: str,
context: Optional[str] = None,
organisms: Optional[List[str]] = None,
namespaces: Optional[List[str]] = None,
) -> List[Match]:
"""Get matches in Biolexica's format."""
return [
Match.from_gilda(scored_match)
for scored_match in super().ground(
s, context=context, organisms=organisms, namespaces=namespaces
)
]

def get_best_match(
self,
s: str,
context: Optional[str] = None,
organisms: Optional[List[str]] = None,
namespaces: Optional[List[str]] = None,
) -> Optional[Match]:
"""Get the best match in Biolexica's format."""
scored_matches = super().ground(
s, context=context, organisms=organisms, namespaces=namespaces
)
if not scored_matches:
return None
return Match.from_gilda(scored_matches[0])


def load_grounder(grounder: GrounderHint) -> Grounder:
"""Load a gilda grounder, potentially from a remote location."""
if isinstance(grounder, str):
if grounder in PREDEFINED:
grounder = URL_FMT.format(key=grounder)
if LEXICA.is_dir():
# If biolexica is installed in editable mode, try looking for
# the directory outside the package root and load the predefined
# index directly
grounder = LEXICA.joinpath(grounder, "terms.tsv.gz").as_posix()
else:
grounder = URL_FMT.format(key=grounder)
if grounder.startswith("http"):
with tempfile.TemporaryDirectory() as directory:
path = Path(directory).joinpath("terms.tsv.gz")
urlretrieve(grounder, path) # noqa:S310
return gilda.Grounder(path)
return Grounder(path)
if isinstance(grounder, (str, Path)):
path = Path(grounder).resolve()
if not path.is_file():
raise FileNotFoundError(path)
return gilda.Grounder(grounder)
return grounder
return Grounder(grounder)
if isinstance(grounder, Grounder):
return grounder
if isinstance(grounder, gilda.Grounder):
return Grounder(grounder.entries)
raise TypeError


def assemble_grounder(
Expand All @@ -80,15 +148,15 @@ def assemble_grounder(
*,
extra_terms: Optional[List["gilda.Term"]] = None,
include_biosynonyms: bool = True,
) -> gilda.Grounder:
) -> Grounder:
"""Assemble terms from multiple resources and load into a grounder."""
terms = assemble_terms(
configuration=configuration,
mappings=mappings,
include_biosynonyms=include_biosynonyms,
extra_terms=extra_terms,
)
grounder = gilda.Grounder(list(terms))
grounder = Grounder(list(terms))
return grounder


Expand Down
26 changes: 4 additions & 22 deletions src/biolexica/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
from typing import List

import fastapi
import gilda
from curies import Reference
from fastapi import FastAPI, Request
from pydantic import BaseModel

from biolexica.api import GrounderHint, load_grounder
from biolexica.api import Grounder, GrounderHint, Match, load_grounder

__all__ = [
"run_app",
Expand All @@ -18,16 +15,8 @@
api_router = fastapi.APIRouter()


class Match(BaseModel):
"""Model a scored match from Gilda."""

reference: Reference
name: str
score: float


def run_app(grounder: GrounderHint):
"""Costruct a FastAPI app from a Gilda grounder and run with :mod:`uvicorn`."""
"""Construct a FastAPI app from a Gilda grounder and run with :mod:`uvicorn`."""
import uvicorn

uvicorn.run(get_app(grounder))
Expand All @@ -41,19 +30,12 @@ def get_app(grounder: GrounderHint) -> FastAPI:
return app


def _get_grounder(request: Request) -> gilda.Grounder:
def _get_grounder(request: Request) -> Grounder:
return request.app.state


def _ground(request: Request, text: str) -> List[Match]:
return [
Match(
reference=Reference(prefix=scored_match.term.db, identifier=scored_match.term.id),
name=scored_match.term.entry_name,
score=scored_match.score,
)
for scored_match in _get_grounder(request).ground(text)
]
return _get_grounder(request).get_matches(text)


@api_router.get("/summarize")
Expand Down

0 comments on commit 34c08ae

Please sign in to comment.