From 8fc63e98d1f76177f8895f9f453146a92e2c6364 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 20 Apr 2026 16:39:18 -0400 Subject: [PATCH 1/6] Migrate bedhost to Starlette 1.0 Starlette 1.0.0 is now the installed version (pulled in by an unrelated CI rebuild on 2026-04-16). This commit migrates the codebase to the Starlette 1.0 idioms that replace the removed/deprecated pre-1.0 APIs. - TemplateResponse: use (request, name, context) signature; drop request from the context dict (was causing TypeError: unhashable type: 'dict' on GET /v1). - Lifespan handler replaces module-import-time global state (BackgroundScheduler, bbagent, usage_data, ref_validator) with app.state + FastAPI Depends() injection in dependencies.py. - Pydantic v2 / FastAPI 0.100+ kwargs: regex= -> pattern=, example= -> examples=[...] in data_models.py. - starlette.responses imports -> fastapi.responses in helpers.py. - Duplicate endpoint function names renamed (get_bedbase_db_stats x3, get_bedset_metadata x3, embed_bed_file x2, get_tokens x2) to unblock request.url_for(name=...) lookups. - HTTPException(status_code=...) calls in bed_api.py get explicit detail=; fix return HTTPException(...) -> raise. - uvicorn.run("bedhost.main:app", ...) import-string form. - dependencies.py no longer imports from main; fetch_detailed_stats takes bbagent as argument. No version pins added. No backwards-compatibility shims. --- bedhost/data_models.py | 6 +- bedhost/dependencies.py | 30 +++++-- bedhost/helpers.py | 45 ++++++++-- bedhost/main.py | 154 ++++++++++++++++++--------------- bedhost/routers/base_api.py | 40 ++++++--- bedhost/routers/bed_api.py | 63 +++++++++++--- bedhost/routers/bedset_api.py | 41 ++++++--- bedhost/routers/objects_api.py | 29 +++++-- 8 files changed, 286 insertions(+), 122 deletions(-) diff --git a/bedhost/data_models.py b/bedhost/data_models.py index e5b45cca..5675357d 100644 --- a/bedhost/data_models.py +++ b/bedhost/data_models.py @@ -20,7 +20,7 @@ class BedList(BaseModel): BedDigest = Path( ..., description="BED digest", - regex=r"^\w+$", + pattern=r"^\w+$", max_length=32, min_length=32, # example=ex_bed_digest, @@ -29,8 +29,8 @@ class BedList(BaseModel): CROM_NUMBERS = Path( ..., description="Chromosome number", - regex=r"^\S+$", - example=ex_chr, + pattern=r"^\S+$", + examples=[ex_chr], ) diff --git a/bedhost/dependencies.py b/bedhost/dependencies.py index c51840b9..f07418bd 100644 --- a/bedhost/dependencies.py +++ b/bedhost/dependencies.py @@ -1,8 +1,28 @@ -from cachetools import cached, TTLCache -from .main import bbagent -from bbconf.models.base_models import FileStats +from bbconf.bbagent import BedBaseAgent +from bbconf.models.base_models import FileStats, UsageModel +from bedboss.refgenome_validator.main import ReferenceValidator +from fastapi import Request -@cached(TTLCache(maxsize=100, ttl=14 * 24 * 60 * 60)) -def fetch_detailed_stats(concise: bool = False) -> FileStats: +def get_bbagent(request: Request) -> BedBaseAgent: + return request.app.state.bbagent + + +def get_usage_data(request: Request) -> UsageModel: + return request.app.state.usage_data + + +def get_ref_validator(request: Request) -> ReferenceValidator: + return request.app.state.ref_validator + + +def fetch_detailed_stats(bbagent: BedBaseAgent, concise: bool = False) -> FileStats: + """ + Fetch detailed file statistics from the BedBaseAgent. + + The previous implementation cached this with a 14-day TTL keyed on the + ``concise`` flag. With bbagent now flowing through FastAPI dependencies + (and process lifetimes typically shorter than the old TTL anyway), the + cache has been removed — the underlying query lives in Postgres. + """ return bbagent.get_detailed_stats(concise=concise) diff --git a/bedhost/helpers.py b/bedhost/helpers.py index 7d8e44d2..0b209b81 100644 --- a/bedhost/helpers.py +++ b/bedhost/helpers.py @@ -5,8 +5,8 @@ import datetime from bbconf.bbagent import BedBaseAgent from bbconf.models.base_models import UsageModel -from starlette.responses import FileResponse, JSONResponse, RedirectResponse -from fastapi import Query +from fastapi.responses import FileResponse, JSONResponse, RedirectResponse +from fastapi import Query, Request from . import _LOGGER from .exceptions import BedHostException @@ -89,13 +89,14 @@ def drs_response(status_code, msg): def count_requests( - usage_data: UsageModel, event: Literal["bed_search", "bedset_search", "bed_meta", "bedset_meta", "files"], ): """ - Decorator to count requests for different events + Decorator to count requests for different events. + + The wrapped endpoint must accept ``request: Request``; the usage data model + is read from ``request.app.state.usage_data`` per-request. - :param UsageModel usage_data: usage data model :param str event: event type """ @@ -109,6 +110,13 @@ async def wrapper(*args, **kwargs): f"Test request was executed. For '{event}' event with: {args}, {kwargs}. No results saved." ) return function_result + request = kwargs.get("request") + if request is None: + raise RuntimeError( + f"count_requests decorator requires the wrapped endpoint " + f"'{func.__name__}' to accept a 'request: Request' parameter." + ) + usage_data: UsageModel = request.app.state.usage_data if event == "files": file_path = kwargs.get("file_path") if "bed" in file_path or "bigbed" in file_path.lower(): @@ -159,3 +167,30 @@ def init_model_usage(): files={}, date_from=datetime.datetime.now(), ) + + +def upload_usage(bbagent: BedBaseAgent, usage_data: UsageModel) -> None: + """ + Upload usage data to the database and reset the usage data in place. + + :param BedBaseAgent bbagent: the bbconf agent used to persist usage records + :param UsageModel usage_data: the live usage model to flush and reset + """ + from .const import USAGE_RECORD_DAYS + + _LOGGER.info("Running uploading of the usage") + usage_data.date_to = datetime.datetime.now() + datetime.timedelta( + days=USAGE_RECORD_DAYS + ) + try: + bbagent.add_usage(usage_data) + except Exception as e: + _LOGGER.error(f"Error while uploading usage data: {e}") + + usage_data.bed_meta = {} + usage_data.bedset_meta = {} + usage_data.bed_search = {} + usage_data.bedset_search = {} + usage_data.files = {} + usage_data.date_from = datetime.datetime.now() + usage_data.date_to = None diff --git a/bedhost/main.py b/bedhost/main.py index 1f420fcf..b86c5e6a 100644 --- a/bedhost/main.py +++ b/bedhost/main.py @@ -1,9 +1,11 @@ import os import sys -import datetime +from contextlib import asynccontextmanager +from pathlib import Path import markdown import uvicorn +from apscheduler.schedulers.background import BackgroundScheduler from bbconf.exceptions import ( BEDFileNotFoundError, BedSetNotFoundError, @@ -20,9 +22,14 @@ from . import _LOGGER from ._version import __version__ as bedhost_version from .cli import build_parser -from .const import PKG_NAME, STATIC_PATH, USAGE_SAVE_HOURS, USAGE_RECORD_DAYS -from .helpers import attach_routers, configure, drs_response, init_model_usage -from apscheduler.schedulers.background import BackgroundScheduler +from .const import PKG_NAME, STATIC_PATH, USAGE_SAVE_HOURS +from .helpers import ( + attach_routers, + configure, + drs_response, + init_model_usage, + upload_usage, +) tags_metadata = [ { @@ -51,12 +58,67 @@ }, ] + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Application lifespan handler. + + Startup: + - Read BEDBASE_CONFIG from env (raise EnvironmentError if missing). + - Build the BedBaseAgent, UsageModel, and ReferenceValidator. + - Stash them on app.state for dependency injection. + - Start the BackgroundScheduler that periodically flushes usage data. + + Shutdown: + - Stop the scheduler so it does not leak across reloads. + """ + bbconf_file_path = os.environ.get("BEDBASE_CONFIG") + if not bbconf_file_path: + raise EnvironmentError( + "No BEDBASE_CONFIG found. Can't configure server. " + "Check documentation to create config file" + ) + + _LOGGER.info(f"Running {PKG_NAME} app...") + app.state.bbagent = configure(bbconf_file_path) + app.state.usage_data = init_model_usage() + + # Respect BEDHOST_INIT_ML for CI/smoke deployments that don't need the + # reference genome validator loaded. Default is to initialize it. + init_ml_env = os.environ.get("BEDHOST_INIT_ML", "true").lower() + if init_ml_env in ("0", "false", "no"): + _LOGGER.info( + "BEDHOST_INIT_ML=false; skipping ReferenceValidator initialization." + ) + app.state.ref_validator = None + else: + _LOGGER.info("Initializing reference genome validator...") + app.state.ref_validator = ReferenceValidator() + + scheduler = BackgroundScheduler() + scheduler.add_job( + upload_usage, + "interval", + hours=USAGE_SAVE_HOURS, + args=(app.state.bbagent, app.state.usage_data), + ) + scheduler.start() + app.state.scheduler = scheduler + + try: + yield + finally: + app.state.scheduler.shutdown(wait=False) + + app = FastAPI( title=PKG_NAME, description="BED file/sets statistics and image server API", version=bedhost_version, docs_url="/v1/docs", openapi_tags=tags_metadata, + lifespan=lifespan, ) origins = [ @@ -75,7 +137,7 @@ allow_headers=["*"], ) -templates = Jinja2Templates(directory="bedhost/templates") +templates = Jinja2Templates(directory=str(Path(__file__).parent / "templates")) templates.env.autoescape = False @@ -107,7 +169,7 @@ def render_markdown(filename: str, request: Request): text = input_file.read() content = markdown.markdown(text) return templates.TemplateResponse( - "page.html", {"request": request, "content": content} + request, "page.html", {"content": content} ) @@ -127,10 +189,16 @@ async def exc_handler_BedSetNotFoundError(req: Request, exc: BedSetNotFoundError @app.exception_handler(MissingObjectError) -async def exc_handler_BedSetNotFoundError(req: Request, exc: MissingObjectError): +async def exc_handler_MissingObjectError(req: Request, exc: MissingObjectError): return drs_response(404, "Object not found.") +# Router endpoints use Depends() to resolve bbagent/usage_data/ref_validator +# from app.state at request time, so attaching at module import is safe +# regardless of lifespan ordering. +attach_routers(app) + + def main(): parser = build_parser() args = parser.parse_args() @@ -140,71 +208,19 @@ def main(): sys.exit(1) if args.command == "serve": - _LOGGER.info(f"Running {PKG_NAME} app...") bbconf_file_path = args.config or os.environ.get("BEDBASE_CONFIG") or None + if bbconf_file_path: + os.environ["BEDBASE_CONFIG"] = bbconf_file_path - global bbagent + # Load config once just to pull host/port out. Lifespan will rebuild + # the agent in the uvicorn worker process. bbagent = configure(bbconf_file_path) + host = bbagent.config.config.server.host + port = bbagent.config.config.server.port - _LOGGER.info("Initializing reference genome validator...") - global ref_validator - ref_validator = ReferenceValidator() - - attach_routers(app) + _LOGGER.info(f"Running {PKG_NAME} app on {host}:{port}...") uvicorn.run( - app, - host=bbagent.config.config.server.host, - port=bbagent.config.config.server.port, - ) - - -if __name__ != "__main__": - if os.environ.get("BEDBASE_CONFIG"): - import logging - - _LOGGER.setLevel(logging.DEBUG) - _LOGGER.info(f"Running {PKG_NAME} app...") - bbconf_file_path = os.environ.get("BEDBASE_CONFIG") or None - global bbagent - global usage_data - global ref_validator - - bbagent = configure( - bbconf_file_path - ) # configure before attaching routers to avoid circular imports - usage_data = init_model_usage() - - ref_validator = ReferenceValidator() - - scheduler = BackgroundScheduler() - - def upload_usage(): - """ - Upload usage data to the database and reset the usage data - """ - - _LOGGER.info("Running uploading of the usage") - usage_data.date_to = datetime.datetime.now() + datetime.timedelta( - days=USAGE_RECORD_DAYS - ) - try: - bbagent.add_usage(usage_data) - except Exception as e: - _LOGGER.error(f"Error while uploading usage data: {e}") - - usage_data.bed_meta = {} - usage_data.bedset_meta = {} - usage_data.bed_search = {} - usage_data.bedset_search = {} - usage_data.files = {} - usage_data.date_from = datetime.datetime.now() - usage_data.date_to = None - - scheduler.add_job(upload_usage, "interval", hours=USAGE_SAVE_HOURS) - scheduler.start() - - attach_routers(app) - else: - raise EnvironmentError( - "No BEDBASE_CONFIG found. Can't configure server. Check documentation to create config file" + "bedhost.main:app", + host=host, + port=port, ) diff --git a/bedhost/routers/base_api.py b/bedhost/routers/base_api.py index e0e8c5f8..ebc8ae38 100644 --- a/bedhost/routers/base_api.py +++ b/bedhost/routers/base_api.py @@ -6,8 +6,10 @@ from platform import python_version from bbconf import __version__ as bbconf_version +from bbconf.bbagent import BedBaseAgent from bbconf.models.base_models import StatsReturn, FileStats, UsageStats -from fastapi import APIRouter, Request +from fastapi import APIRouter, Depends, Request +from fastapi.responses import RedirectResponse from geniml import __version__ as geniml_version from .._version import __version__ as bedhost_version @@ -19,12 +21,10 @@ ServiceInfoResponse, Type, ) -from ..dependencies import fetch_detailed_stats +from ..dependencies import fetch_detailed_stats, get_bbagent from ..helpers import get_openapi_version, count_requests, test_query_parameter -from ..main import app, bbagent, usage_data router = APIRouter(prefix="/v1", tags=["base"]) -from fastapi.responses import RedirectResponse packages_versions = {} @@ -34,7 +34,9 @@ summary="Get summary statistics for BEDbase platform", response_model=StatsReturn, ) -async def get_bedbase_db_stats(): +async def get_bedbase_db_stats( + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns statistics """ @@ -48,11 +50,12 @@ async def get_bedbase_db_stats(): ) async def get_detailed_stats( concise: bool = False, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Returns detailed statistics """ - return fetch_detailed_stats(concise=concise) + return fetch_detailed_stats(bbagent, concise=concise) @router.get( @@ -60,7 +63,9 @@ async def get_detailed_stats( summary="Get detailed usage statistics for BEDbase platform", response_model=UsageStats, ) -async def get_detailed_usage(): +async def get_detailed_usage( + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns detailed usage statistics """ @@ -72,7 +77,9 @@ async def get_detailed_usage(): summary="Get available genomes", response_model=BaseListResponse, ) -async def get_bedbase_db_stats(): +async def get_genomes_list( + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns statistics """ @@ -90,7 +97,9 @@ async def get_bedbase_db_stats(): summary="Get available assays", response_model=BaseListResponse, ) -async def get_bedbase_db_stats(): +async def get_assays_list( + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns statistics """ @@ -106,7 +115,10 @@ async def get_bedbase_db_stats(): @router.get( "/service-info", summary="GA4GH service info", response_model=ServiceInfoResponse ) -async def service_info(): +async def service_info( + request: Request, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns information about this service, such as versions, name, etc. """ @@ -115,7 +127,7 @@ async def service_info(): bbconf_version=bbconf_version, geniml_version=geniml_version, python_version=python_version(), - openapi_version=get_openapi_version(app), + openapi_version=get_openapi_version(request.app), ) return ServiceInfoResponse( @@ -142,9 +154,11 @@ async def service_info(): @router.get("/files/{file_path:path}") -@count_requests(usage_data, event="files") +@count_requests(event="files") async def redirect_to_download( - file_path: str, request: Request, test_request: bool = test_query_parameter + file_path: str, + request: Request, + test_request: bool = test_query_parameter, ): download_url = f"https://data2.bedbase.org/{file_path}" return RedirectResponse(url=download_url) diff --git a/bedhost/routers/bed_api.py b/bedhost/routers/bed_api.py index fa7a7d6d..a61d99b3 100644 --- a/bedhost/routers/bed_api.py +++ b/bedhost/routers/bed_api.py @@ -31,7 +31,9 @@ RefGenValidReturnModel, RefGenValidModel, ) -from fastapi import APIRouter, File, HTTPException, Query, UploadFile, Request +from bbconf.bbagent import BedBaseAgent +from bedboss.refgenome_validator.main import ReferenceValidator +from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, Request from fastapi.responses import PlainTextResponse from gtars.models import RegionSet @@ -43,7 +45,7 @@ BedDigest, ChromLengthUploadModel, ) -from ..main import bbagent, usage_data, ref_validator +from ..dependencies import get_bbagent, get_ref_validator from ..helpers import count_requests, test_query_parameter router = APIRouter(prefix="/v1/bed", tags=["bed"]) @@ -55,7 +57,9 @@ response_model=BedMetadataAll, response_model_by_alias=False, ) -async def get_example_bed_record(): +async def get_example_bed_record( + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Get metadata for an example BED record. """ @@ -81,6 +85,7 @@ async def list_beds( bed_compliance: str = Query( default=None, description="filter by bed type. e.g. 'bed6+4'" ), + bbagent: BedBaseAgent = Depends(get_bbagent), ) -> BedListResult: """ Returns list of BED files in the database with optional filters. @@ -98,13 +103,15 @@ async def list_beds( response_model_by_alias=False, description=f"Example\n " f"bed_id: {EXAMPLE_BED}", ) -@count_requests(usage_data, event="bed_meta") +@count_requests(event="bed_meta") async def get_bed_metadata( + request: Request, bed_id: str = BedDigest, full: Optional[bool] = Query( False, description="Return full record with stats, plots, files and metadata" ), test_request: bool = test_query_parameter, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Returns metadata for a single BED record. if full=True, returns full record with stats, plots, files and metadata. @@ -114,6 +121,7 @@ async def get_bed_metadata( except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED file not found", ) @@ -125,12 +133,14 @@ async def get_bed_metadata( ) async def get_bed_plots( bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_plots(bed_id) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED plots not found", ) @@ -142,12 +152,14 @@ async def get_bed_plots( ) async def get_bed_files( bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_files(bed_id) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED files not found", ) @@ -159,12 +171,14 @@ async def get_bed_files( ) async def get_bed_stats( bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_stats(bed_id) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED stats not found", ) @@ -177,12 +191,14 @@ async def get_bed_stats( ) async def get_bed_classification( bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_classification(bed_id) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED classification not found", ) @@ -198,12 +214,14 @@ async def get_bed_classification( ) async def get_bed_pephub( bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_raw_metadata(bed_id) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED raw metadata not found", ) @@ -219,12 +237,14 @@ async def get_bed_neighbours( bed_id: str = BedDigest, limit: int = 10, offset: int = 0, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bed.get_neighbours(bed_id, limit=limit, offset=offset) except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED neighbours not found", ) @@ -233,7 +253,10 @@ async def get_bed_neighbours( summary="Get embeddings for a single BED record", response_model=BedEmbeddingResult, ) -async def get_bed_embedding(bed_id: str = BedDigest): +async def get_bed_embedding( + bed_id: str = BedDigest, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns embeddings for a single BED record. """ @@ -242,6 +265,7 @@ async def get_bed_embedding(bed_id: str = BedDigest): except BEDFileNotFoundError as _: raise HTTPException( status_code=404, + detail="BED embedding not found", ) @@ -252,6 +276,7 @@ async def get_bed_embedding(bed_id: str = BedDigest): ) async def embed_bed_file( file: UploadFile = File(None), + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Create embedding for bed file @@ -276,8 +301,9 @@ async def embed_bed_file( summary="Get embeddings for a bed file.", response_model=List[float], ) -async def embed_bed_file( +async def umap_bed_file( file: UploadFile = File(None), + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Create embedding for bed file @@ -304,6 +330,8 @@ async def embed_bed_file( ) async def analyze_reference_genome( chrom_lengths: ChromLengthUploadModel, + bbagent: BedBaseAgent = Depends(get_bbagent), + ref_validator: ReferenceValidator = Depends(get_ref_validator), ): """ Provide length of the chromosomes for a reference genome, and @@ -350,7 +378,10 @@ async def analyze_reference_genome( summary="Get missing plots for a bed file.", response_model=BaseListResponse, ) -async def missing_plots(plot_id: str): +async def missing_plots( + plot_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Get missing plots for a bed file @@ -386,6 +417,7 @@ def get_regions_for_bedfile( end: Annotated[ Optional[str], Query(description="query range: start coordinate") ] = None, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Returns the queried regions with provided ID and optional query parameters @@ -438,14 +470,16 @@ def get_regions_for_bedfile( response_model=BedListSearchResult, response_model_by_alias=False, ) -@count_requests(usage_data, event="bed_search") +@count_requests(event="bed_search") async def text_to_bed_search( + request: Request, query: str, genome: Optional[Union[str, None]] = None, assay: Optional[Union[str, None]] = None, limit: int = 10, offset: int = 0, test_request: bool = test_query_parameter, # needed for usage tracking in @count_requests + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Search for a BedFile by a text query. @@ -589,6 +623,7 @@ async def exact_search( assay: Optional[Union[str, None]] = None, limit: int = 10, offset: int = 0, + bbagent: BedBaseAgent = Depends(get_bbagent), ): return bbagent.bed.sql_search( query=query, @@ -607,7 +642,10 @@ async def exact_search( response_model_by_alias=False, ) async def bed_to_bed_search( - file: UploadFile = File(None), limit: int = 10, offset: int = 0 + file: UploadFile = File(None), + limit: int = 10, + offset: int = 0, + bbagent: BedBaseAgent = Depends(get_bbagent), ): _LOGGER.info("Searching for bedfiles...") print("file size {}", file.size) @@ -650,7 +688,7 @@ async def bed_to_bed_search( ) return results - return HTTPException( + raise HTTPException( status_code=404, detail="Error occurred, please make sure file is correct and if issue persists, contact support.", ) @@ -664,6 +702,7 @@ async def bed_to_bed_search( async def get_tokens( bed_id: str, universe_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Return univers of bed file @@ -685,9 +724,10 @@ async def get_tokens( summary="Get link to tokenized bed file", response_model=TokenizedPathResponse, ) -async def get_tokens( +async def get_tokens_info( bed_id: str, universe_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Return link to tokenized bed file @@ -710,6 +750,7 @@ async def get_tokens( ) async def get_ref_gen_results( bed_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Return reference genome validation results for a bed file diff --git a/bedhost/routers/bedset_api.py b/bedhost/routers/bedset_api.py index 0d7a4910..eb18599a 100644 --- a/bedhost/routers/bedset_api.py +++ b/bedhost/routers/bedset_api.py @@ -1,5 +1,6 @@ import logging +from bbconf.bbagent import BedBaseAgent from bbconf.exceptions import BedSetNotFoundError, BedSetTrackHubLimitError from bbconf.models.bedset_models import ( BedSetBedFiles, @@ -9,11 +10,11 @@ BedSetStats, ) from pephubclient.helpers import is_registry_path, unwrap_registry_path -from fastapi import APIRouter, HTTPException, Request, Response +from fastapi import APIRouter, Depends, HTTPException, Request, Response from ..const import EXAMPLE_BEDSET, PKG_NAME -from ..main import bbagent, usage_data from ..data_models import CreateBEDsetRequest +from ..dependencies import get_bbagent from ..utils import zip_pep from ..helpers import count_requests, test_query_parameter @@ -28,7 +29,9 @@ response_model=BedSetMetadata, response_model_by_alias=False, ) -async def get_example_bedset_record(): +async def get_example_bedset_record( + bbagent: BedBaseAgent = Depends(get_bbagent), +): result = bbagent.bedset.get_ids_list(limit=1).results if result: return bbagent.bedset.get(result[0].id, full=True) @@ -41,12 +44,14 @@ async def get_example_bedset_record(): tags=["search"], response_model=BedSetListResult, ) -@count_requests(usage_data, event="bedset_search") +@count_requests(event="bedset_search") async def list_bedsets( + request: Request, query: str = None, limit: int = 1000, offset: int = 0, test_request: bool = test_query_parameter, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Returns a list of BEDset records in the database with optional filters and search. @@ -61,11 +66,13 @@ async def list_bedsets( description=f"Example\n bed_id: {EXAMPLE_BEDSET}", response_model_by_alias=False, ) -@count_requests(usage_data, event="bedset_meta") +@count_requests(event="bedset_meta") async def get_bedset_metadata( + request: Request, bedset_id: str, full: bool = True, test_request: bool = test_query_parameter, + bbagent: BedBaseAgent = Depends(get_bbagent), ): # TODO: fix error with not found try: @@ -81,6 +88,7 @@ async def get_bedset_metadata( ) async def get_bedset_pep( bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return zip_pep(bbagent.bedset.get_bedset_pep(bedset_id)) @@ -94,8 +102,9 @@ async def get_bedset_pep( summary="Get plots for single bedset record", description=f"Example\n bed_id: {EXAMPLE_BEDSET}", ) -async def get_bedset_metadata( +async def get_bedset_plots_handler( bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): """ Returns metadata from selected columns for selected bedset @@ -112,8 +121,9 @@ async def get_bedset_metadata( summary="Get stats for a single BEDSET record", description=f"Example\n bed_id: {EXAMPLE_BEDSET}", ) -async def get_bedset_metadata( +async def get_bedset_stats_handler( bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): try: return bbagent.bedset.get_statistics(bedset_id) @@ -129,13 +139,18 @@ async def get_bedset_metadata( ) async def get_bedfiles_in_bedset( bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), ): return bbagent.bedset.get_bedset_bedfiles(bedset_id) @router.head("/{bedset_id}/track_hub") @router.get("/{bedset_id}/track_hub") -async def get_track_hub_bedset(request: Request, bedset_id: str): +async def get_track_hub_bedset( + request: Request, + bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Generate track hub files for the BED set """ @@ -172,7 +187,10 @@ async def get_genomes_file_bedset(request: Request, bedset_id: str): @router.head("/{bedset_id}/track_hub_trackDb_file", include_in_schema=False) @router.get("/{bedset_id}/track_hub_trackDb_file", include_in_schema=False) -async def get_trackDb_file_bedset(bedset_id: str): +async def get_trackDb_file_bedset( + bedset_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Generate trackDb file for the BED set track hub """ @@ -200,7 +218,10 @@ async def get_trackDb_file_bedset(bedset_id: str): "/create", description="Create a new bedset by providing registry path to the PEPhub project", ) -async def create_bedset(bedset: CreateBEDsetRequest): +async def create_bedset( + bedset: CreateBEDsetRequest, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Create a new bedset """ diff --git a/bedhost/routers/objects_api.py b/bedhost/routers/objects_api.py index 157aba9f..1857297d 100644 --- a/bedhost/routers/objects_api.py +++ b/bedhost/routers/objects_api.py @@ -6,11 +6,12 @@ from urllib.parse import urlparse +from bbconf.bbagent import BedBaseAgent from bbconf.models.drs_models import DRSModel -from fastapi import APIRouter, HTTPException, Request +from fastapi import APIRouter, Depends, HTTPException, Request +from ..dependencies import get_bbagent from ..helpers import serve_file -from ..main import bbagent router = APIRouter(prefix="/v1/objects", tags=["objects"]) @@ -20,7 +21,11 @@ summary="Get DRS object metadata", response_model=DRSModel, ) -async def get_drs_object_metadata(object_id: str, req: Request): +async def get_drs_object_metadata( + object_id: str, + req: Request, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns metadata about a DrsObject. """ @@ -36,7 +41,11 @@ async def get_drs_object_metadata(object_id: str, req: Request): summary="Get URL where you can retrieve files", response_model=str, ) -async def get_object_bytes_url(object_id: str, access_id: str): +async def get_object_bytes_url( + object_id: str, + access_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns a URL that can be used to fetch the bytes of a DrsObject. """ @@ -54,7 +63,11 @@ async def get_object_bytes_url(object_id: str, access_id: str): summary="Download actual file", response_model=bytes, ) -async def get_object_bytes(object_id: str, access_id: str): +async def get_object_bytes( + object_id: str, + access_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns the bytes of a DrsObject. """ @@ -71,7 +84,11 @@ async def get_object_bytes(object_id: str, access_id: str): summary="Download thumbnail file", response_model=bytes, ) -async def get_object_thumbnail(object_id: str, access_id: str): +async def get_object_thumbnail( + object_id: str, + access_id: str, + bbagent: BedBaseAgent = Depends(get_bbagent), +): """ Returns the bytes of a thumbnail of a DrsObject """ From 941229fe7b11ed5bc0560886ae2ea256bb861ec1 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 20 Apr 2026 17:16:26 -0400 Subject: [PATCH 2/6] Guard analyze_reference_genome against None ref_validator When BEDHOST_INIT_ML=false, app.state.ref_validator is None. Previously analyze_reference_genome would unconditionally call ref_validator.determine_compatibility, raising AttributeError and returning 500. Return a clean 503 instead. --- bedhost/routers/bed_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bedhost/routers/bed_api.py b/bedhost/routers/bed_api.py index a61d99b3..aac74fdc 100644 --- a/bedhost/routers/bed_api.py +++ b/bedhost/routers/bed_api.py @@ -338,6 +338,12 @@ async def analyze_reference_genome( return reference genome validation results for a bed file """ + if ref_validator is None: + raise HTTPException( + status_code=503, + detail="Reference validator unavailable (BEDHOST_INIT_ML=false)", + ) + try: genome_aliases = bbagent.get_reference_genomes() result = ref_validator.determine_compatibility( From dee6e24b60511f19e251f874e7430b50a3838bde Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 20 Apr 2026 17:31:42 -0400 Subject: [PATCH 3/6] Pin sentence_transformers>=5.0 (bbconf imports SparseEncoder) --- requirements/requirements-all.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 261b92d0..ae0c5566 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,7 @@ # bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf bbconf>=0.14.11 fastapi>=0.103.0 +sentence_transformers>=5.0 logmuse>=0.2.7 markdown requests From 108f311f40b7c3a2fe4991ab5d74f39efd6ee35d Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 20 Apr 2026 18:43:03 -0400 Subject: [PATCH 4/6] Bump version to 0.12.7 --- bedhost/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedhost/_version.py b/bedhost/_version.py index 8e2394f4..6ece8ade 100644 --- a/bedhost/_version.py +++ b/bedhost/_version.py @@ -1 +1 @@ -__version__ = "0.12.6" +__version__ = "0.12.7" From 8671b2f06c42d70c06201bae3aa7004a59d239c1 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 20 Apr 2026 18:45:54 -0400 Subject: [PATCH 5/6] black format --- bedhost/main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bedhost/main.py b/bedhost/main.py index b86c5e6a..505dafc1 100644 --- a/bedhost/main.py +++ b/bedhost/main.py @@ -168,9 +168,7 @@ def render_markdown(filename: str, request: Request): with open(os.path.join(STATIC_PATH, filename), "r", encoding="utf-8") as input_file: text = input_file.read() content = markdown.markdown(text) - return templates.TemplateResponse( - request, "page.html", {"content": content} - ) + return templates.TemplateResponse(request, "page.html", {"content": content}) @app.exception_handler(MissingThumbnailError) From 9d2a7d823593941661e4c6c70561b64dcc83e130 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 22 Apr 2026 10:51:43 -0400 Subject: [PATCH 6/6] fmt --- bedhost/og_image.py | 11 +++++++++-- bedhost/routers/bed_api.py | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/bedhost/og_image.py b/bedhost/og_image.py index bb274cf4..fd879056 100644 --- a/bedhost/og_image.py +++ b/bedhost/og_image.py @@ -22,14 +22,21 @@ def _font(name: str, size: int) -> ImageFont.FreeTypeFont: return ImageFont.load_default() -def _centered_x(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont) -> int: +def _centered_x( + draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont +) -> int: return (_W - int(draw.textlength(text, font=font))) // 2 def _draw_stat_card(draw: ImageDraw.ImageDraw, x: int, y: int, label: str, value: str): w, h = 250, 115 draw.rounded_rectangle([x, y, x + w, y + h], radius=12, fill=_LIGHT_GRAY) - draw.text((x + 18, y + 14), label.upper(), font=_font("Roboto-Regular.ttf", 21), fill=_GRAY) + draw.text( + (x + 18, y + 14), + label.upper(), + font=_font("Roboto-Regular.ttf", 21), + fill=_GRAY, + ) draw.text((x + 18, y + 50), value, font=_font("Roboto-Bold.ttf", 36), fill=_DARK) diff --git a/bedhost/routers/bed_api.py b/bedhost/routers/bed_api.py index 1480eef3..fa6dd941 100644 --- a/bedhost/routers/bed_api.py +++ b/bedhost/routers/bed_api.py @@ -148,7 +148,11 @@ async def get_bed_og_image(bed_id: str = BedDigest): number_of_regions=getattr(stats, "number_of_regions", None) if stats else None, mean_region_width=getattr(stats, "mean_region_width", None) if stats else None, ) - return Response(content=png, media_type="image/png", headers={"Cache-Control": "public, max-age=86400"}) + return Response( + content=png, + media_type="image/png", + headers={"Cache-Control": "public, max-age=86400"}, + ) @router.get(