From f121ebe9f41912f3e1dde2d31f8bdfcb2c5b5248 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:54:33 -0700 Subject: [PATCH 1/3] Namespaced data export with ClinVar namespaces --- src/mavedb/lib/score_sets.py | 158 ++++++++++++++++++++++++------- src/mavedb/routers/score_sets.py | 34 +++++-- 2 files changed, 151 insertions(+), 41 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index d6fa605f..a38a3001 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -37,6 +37,8 @@ from mavedb.models.experiment_controlled_keyword import ExperimentControlledKeywordAssociation from mavedb.models.experiment_publication_identifier import ExperimentPublicationIdentifierAssociation from mavedb.models.experiment_set import ExperimentSet +from mavedb.models.clinical_control import ClinicalControl +from mavedb.models.clinical_control_mapped_variant import mapped_variants_clinical_controls_association_table from mavedb.models.gnomad_variant import GnomADVariant from mavedb.models.mapped_variant import MappedVariant from mavedb.models.publication_identifier import PublicationIdentifier @@ -63,6 +65,10 @@ logger = logging.getLogger(__name__) +# Pattern for ClinVar-versioned namespaces of the form "clinvar.YEAR_MONTH", +# e.g. "clinvar.2024_01" for January 2024. +CLINVAR_NS_PATTERN = re.compile(r"^clinvar\.(\d+)_(0[1-9]|1[0-2])$") + class HGVSColumns: NUCLEOTIDE: str = "hgvs_nt" # dataset.constants.hgvs_nt_column @@ -531,7 +537,7 @@ def find_publish_or_private_superseded_score_set_tail( def get_score_set_variants_as_csv( db: Session, score_set: ScoreSet, - namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]], + namespaces: List[str], namespaced: Optional[bool] = None, start: Optional[int] = None, limit: Optional[int] = None, @@ -548,8 +554,10 @@ def get_score_set_variants_as_csv( The database session to use. score_set : ScoreSet The score set to get the variants from. - namespaces : List[Literal["scores", "counts", "vep", "gnomad", "clingen"]] - The namespaces for data. Now there are only scores, counts, VEP, gnomAD, and ClinGen. ClinVar will be added in the future. + namespaces : List[str] + The namespaces for data: "scores", "counts", "vep", "gnomad", "clingen", and/or + ClinVar-versioned namespaces of the form "clinvar.YEAR_MONTH" (e.g. "clinvar.2024_01" + for January 2024, which joins on db_name="ClinVar" and db_version="01_2024"). namespaced: Optional[bool] = None Whether namespace the columns or not. start : int, optional @@ -600,11 +608,26 @@ def get_score_set_variants_as_csv( namespaced_score_set_columns["gnomad"].append("gnomad_af") if "clingen" in namespaced_score_set_columns: namespaced_score_set_columns["clingen"].append("clingen_allele_id") + + # Parse ClinVar-versioned namespaces of the form "clinvar.YEAR_MONTH". + # The corresponding db_version stored in clinical_controls is "MONTH_YEAR". + clinvar_namespaces: dict[str, str] = {} # namespace -> db_version (MONTH_YEAR) + for ns in namespaces: + m = CLINVAR_NS_PATTERN.match(ns) + if m: + year, month = m.group(1), m.group(2) + db_version = f"{month}_{year}" + clinvar_namespaces[ns] = db_version + namespaced_score_set_columns[ns] = ["clinical_significance", "clinical_review_status"] + variants: Sequence[Variant] = [] mappings: Optional[list[Optional[MappedVariant]]] = None gnomad_data: Optional[list[Optional[GnomADVariant]]] = None - if "gnomad" in namespaces and include_post_mapped_hgvs: + # Mappings are needed whenever post-mapped HGVS or any ClinVar namespace is requested. + need_mappings = bool(include_post_mapped_hgvs or clinvar_namespaces) + + if "gnomad" in namespaces and need_mappings: variants_mappings_and_gnomad_query = ( select(Variant, MappedVariant, GnomADVariant) .join( @@ -640,7 +663,7 @@ def get_score_set_variants_as_csv( variants.append(variant) mappings.append(mapping) gnomad_data.append(gnomad) - elif include_post_mapped_hgvs: + elif need_mappings: variants_and_mappings_query = ( select(Variant, MappedVariant) .join( @@ -707,22 +730,68 @@ def get_score_set_variants_as_csv( if limit: variants_query = variants_query.limit(limit) variants = db.scalars(variants_query).all() + + # For each ClinVar namespace, fetch a mapping from mapped_variant_id to ClinicalControl. + clinvar_data_map: dict[str, dict[int, Optional[ClinicalControl]]] = {} + if clinvar_namespaces and mappings is not None: + mv_ids = [m.id for m in mappings if m is not None] + for ns, db_version in clinvar_namespaces.items(): + mv_to_cc: dict[int, Optional[ClinicalControl]] = {} + if mv_ids: + aliased_cc = aliased(ClinicalControl) + cc_query = ( + select( + mapped_variants_clinical_controls_association_table.c.mapped_variant_id, + aliased_cc, + ) + .join( + aliased_cc, + mapped_variants_clinical_controls_association_table.c.clinical_control_id == aliased_cc.id, + ) + .where( + and_( + mapped_variants_clinical_controls_association_table.c.mapped_variant_id.in_(mv_ids), + aliased_cc.db_name == "ClinVar", + aliased_cc.db_version == db_version, + ) + ) + ) + for mv_id, cc in db.execute(cc_query).all(): + mv_to_cc[mv_id] = cc + clinvar_data_map[ns] = mv_to_cc + + # Build per-variant ClinVar lookup (list indexed in parallel with variants). + clinvar_per_variant: Optional[list[Optional[dict[str, Optional[ClinicalControl]]]]] = None + if clinvar_namespaces and mappings is not None: + clinvar_per_variant = [] + for mapping in mappings: + row_clinvar: dict[str, Optional[ClinicalControl]] = {} + for ns, mv_to_cc in clinvar_data_map.items(): + row_clinvar[ns] = mv_to_cc.get(mapping.id) if mapping is not None else None + clinvar_per_variant.append(row_clinvar) + rows_data = variants_to_csv_rows( variants, columns=namespaced_score_set_columns, namespaced=namespaced, mappings=mappings, gnomad_data=gnomad_data, + clinvar_data_by_ns=clinvar_per_variant, ) # type: ignore - rows_columns = [ - ( - f"{namespace}.{col}" - if (namespaced and namespace not in ["core", "mavedb"]) - else (f"mavedb.{col}" if namespaced and namespace == "mavedb" else col) - ) - for namespace, cols in namespaced_score_set_columns.items() - for col in cols - ] + + rows_columns = [] + for namespace, cols in namespaced_score_set_columns.items(): + for col in cols: + if CLINVAR_NS_PATTERN.match(namespace): + # ClinVar versioned namespaces always include the full namespace prefix + # to avoid column-name collisions when multiple versions are requested. + rows_columns.append(f"{namespace}.{col}") + elif namespaced and namespace not in ["core", "mavedb"]: + rows_columns.append(f"{namespace}.{col}") + elif namespaced and namespace == "mavedb": + rows_columns.append(f"mavedb.{col}") + else: + rows_columns.append(col) if drop_na_columns: rows_data, rows_columns = drop_na_columns_from_csv_file_rows(rows_data, rows_columns) @@ -769,6 +838,7 @@ def variant_to_csv_row( columns: dict[str, list[str]], mapping: Optional[MappedVariant] = None, gnomad_data: Optional[GnomADVariant] = None, + clinvar_data_by_ns: Optional[dict[str, Optional[ClinicalControl]]] = None, namespaced: Optional[bool] = None, na_rep="NA", ) -> dict[str, Any]: @@ -787,6 +857,8 @@ def variant_to_csv_row( Mapped variant corresponding to the variant. gnomad_data : variant.models.GnomADVariant, optional gnomAD variant data corresponding to the variant. + clinvar_data_by_ns : dict[str, Optional[ClinicalControl]], optional + Per-variant ClinVar data keyed by namespace (e.g. "clinvar.2024_01"). na_rep : str String to represent null values. @@ -885,6 +957,23 @@ def variant_to_csv_row( value = na_rep key = f"clingen.{column_key}" if namespaced else column_key row[key] = value + # Handle ClinVar-versioned namespaces (e.g. "clinvar.2024_01"). + # These always use the full "namespace.column" key regardless of the namespaced flag + # to avoid collisions when multiple versions are requested. + for namespace_key, namespace_cols in columns.items(): + if not CLINVAR_NS_PATTERN.match(namespace_key): + continue + clinvar_entry = (clinvar_data_by_ns or {}).get(namespace_key) + for column_key in namespace_cols: + if column_key == "clinical_significance": + value = str(clinvar_entry.clinical_significance) if clinvar_entry else na_rep + elif column_key == "clinical_review_status": + value = str(clinvar_entry.clinical_review_status) if clinvar_entry else na_rep + else: + value = na_rep + if is_null(value): + value = na_rep + row[f"{namespace_key}.{column_key}"] = value return row @@ -893,6 +982,7 @@ def variants_to_csv_rows( columns: dict[str, list[str]], mappings: Optional[Sequence[Optional[MappedVariant]]] = None, gnomad_data: Optional[Sequence[Optional[GnomADVariant]]] = None, + clinvar_data_by_ns: Optional[Sequence[Optional[dict[str, Optional[ClinicalControl]]]]] = None, namespaced: Optional[bool] = None, na_rep="NA", ) -> Iterable[dict[str, Any]]: @@ -911,6 +1001,8 @@ def variants_to_csv_rows( List of mapped variants corresponding to the variants. gnomad_data : list[Optional[variant.models.GnomADVariant]], optional List of gnomAD variant data corresponding to the variants. + clinvar_data_by_ns : list[Optional[dict[str, Optional[ClinicalControl]]]], optional + Per-variant ClinVar data keyed by namespace (e.g. "clinvar.2024_01"). na_rep : str String to represent null values. @@ -918,26 +1010,24 @@ def variants_to_csv_rows( ------- list[dict[str, Any]] """ - if mappings is not None and gnomad_data is not None: - return map( - lambda zipped: variant_to_csv_row( - zipped[0], columns, mapping=zipped[1], gnomad_data=zipped[2], namespaced=namespaced, na_rep=na_rep - ), - zip(variants, mappings, gnomad_data), - ) - elif mappings is not None: - return map( - lambda pair: variant_to_csv_row(pair[0], columns, mapping=pair[1], namespaced=namespaced, na_rep=na_rep), - zip(variants, mappings), - ) - elif gnomad_data is not None: - return map( - lambda pair: variant_to_csv_row( - pair[0], columns, gnomad_data=pair[1], namespaced=namespaced, na_rep=na_rep - ), - zip(variants, gnomad_data), - ) - return map(lambda v: variant_to_csv_row(v, columns, namespaced=namespaced, na_rep=na_rep), variants) + n = len(variants) + _mappings: Sequence[Optional[MappedVariant]] = mappings if mappings is not None else [None] * n + _gnomad: Sequence[Optional[GnomADVariant]] = gnomad_data if gnomad_data is not None else [None] * n + _clinvar: Sequence[Optional[dict[str, Optional[ClinicalControl]]]] = ( + clinvar_data_by_ns if clinvar_data_by_ns is not None else [None] * n + ) + return map( + lambda t: variant_to_csv_row( + t[0], + columns, + mapping=t[1], + gnomad_data=t[2], + clinvar_data_by_ns=t[3], + namespaced=namespaced, + na_rep=na_rep, + ), + zip(variants, _mappings, _gnomad, _clinvar), + ) def find_meta_analyses_for_score_sets(db: Session, urns: list[str]) -> list[ScoreSet]: diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 7376ca4b..1f1bf6d6 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -48,6 +48,7 @@ from mavedb.lib.permissions import Action, assert_permission, has_permission from mavedb.lib.score_calibrations import create_score_calibration from mavedb.lib.score_sets import ( + CLINVAR_NS_PATTERN, csv_data_to_df, fetch_score_set_search_filter_options, find_meta_analyses_for_experiment_sets, @@ -714,8 +715,13 @@ def get_score_set_variants_csv( urn: str, start: int = Query(default=None, description="Start index for pagination"), limit: int = Query(default=None, description="Maximum number of variants to return"), - namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]] = Query( - default=["scores"], description="One or more data types to include: scores, counts, ClinGen, gnomAD, VEP" + namespaces: List[str] = Query( + default=["scores"], + description=( + 'One or more data types to include: "scores", "counts", "vep", "gnomad", "clingen", ' + 'and/or ClinVar-versioned namespaces of the form "clinvar.YEAR_MONTH" ' + '(e.g. "clinvar.2024_01" for January 2024).' + ), ), drop_na_columns: Optional[bool] = None, include_custom_columns: Optional[bool] = None, @@ -729,9 +735,6 @@ def get_score_set_variants_csv( This differs from get_score_set_scores_csv() in that it returns only the HGVS columns, score column, and mapped HGVS string. - TODO (https://github.com/VariantEffect/mavedb-api/issues/446) We may add another function for ClinVar and gnomAD. - export endpoint, with options governing which columns to include. - Parameters __________ urn : str @@ -740,9 +743,11 @@ def get_score_set_variants_csv( The index to start from. If None, starts from the beginning. limit : Optional[int] The maximum number of variants to return. If None, returns all variants. - namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]] + namespaces: List[str] The namespaces of all columns except for accession, hgvs_nt, hgvs_pro, and hgvs_splice. - We may add ClinVar in the future. + Supported values: "scores", "counts", "vep", "gnomad", "clingen", and ClinVar-versioned + namespaces of the form "clinvar.YEAR_MONTH" (e.g. "clinvar.2024_01" for January 2024). + Multiple ClinVar namespaces with different YEAR_MONTH values may be requested simultaneously. drop_na_columns : bool, optional Whether to drop columns that contain only NA values. Defaults to False. db : Session @@ -772,6 +777,21 @@ def get_score_set_variants_csv( logger.info(msg="Could not fetch scores with non-positive limit.", extra=logging_context()) raise HTTPException(status_code=422, detail="Limit must be positive") + _VALID_STATIC_NAMESPACES = {"scores", "counts", "vep", "gnomad", "clingen"} + invalid_namespaces = [ + ns for ns in namespaces if ns not in _VALID_STATIC_NAMESPACES and not CLINVAR_NS_PATTERN.match(ns) + ] + if invalid_namespaces: + raise HTTPException( + status_code=422, + detail=( + f"Invalid namespace(s): {invalid_namespaces}. " + 'Each namespace must be one of "scores", "counts", "vep", "gnomad", "clingen", ' + 'or a ClinVar-versioned namespace of the form "clinvar.YEAR_MM" ' + '(e.g. "clinvar.2024_01" for January 2024).' + ), + ) + score_set = db.query(ScoreSet).filter(ScoreSet.urn == urn).first() if not score_set: logger.info(msg="Could not fetch the requested scores; No such score set exists.", extra=logging_context()) From 06bd147e951fe82805ded585bf6bfb709ca89c68 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:54:56 -0700 Subject: [PATCH 2/3] Unit tests for ClinVar namespaces in variant data CSV export --- tests/helpers/util/score_set.py | 17 +++++ tests/routers/test_score_set.py | 124 ++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index b2a8b2c6..1fb61772 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -225,6 +225,23 @@ def link_clinical_controls_to_mapped_variants(db, score_set): db.commit() +def link_clinvar_control_to_mapped_variant(db, score_set): + """Link the seeded ClinVar clinical control (id=1) to the first mapped variant of a score set.""" + mapped_variants = db.scalars( + select(MappedVariantDbModel) + .join(VariantDbModel) + .join(ScoreSetDbModel) + .where(ScoreSetDbModel.urn == score_set["urn"]) + ).all() + + mapped_variants[0].clinical_controls.append( + db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 1)) + ) + + db.add(mapped_variants[0]) + db.commit() + + def link_gnomad_variants_to_mapped_variants(db, score_set): mapped_variants = db.scalars( select(MappedVariantDbModel) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index f412b16a..ab5f7142 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -37,6 +37,7 @@ TEST_BIORXIV_IDENTIFIER, TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, + TEST_CLINVAR_CONTROL, TEST_CROSSREF_IDENTIFIER, TEST_EXPERIMENT_WITH_KEYWORD, TEST_GNOMAD_DATA_VERSION, @@ -76,6 +77,7 @@ create_seq_score_set_with_mapped_variants, create_seq_score_set_with_variants, link_clinical_controls_to_mapped_variants, + link_clinvar_control_to_mapped_variant, link_gnomad_variants_to_mapped_variants, publish_score_set, ) @@ -3154,6 +3156,128 @@ def test_download_gnomad_file_in_variant_data_path(session, data_provider, clien assert "gnomad.gnomad_af" in reader.fieldnames +def test_download_clinvar_namespace_in_variant_data_path(session, data_provider, client, setup_router_db, data_files): + """ClinVar namespace returns clinical_significance and clinical_review_status columns with correct values.""" + # The ClinVar control seeded in setup_router_db has db_version="11_2024", mapping to namespace clinvar.2024_11. + clinvar_namespace = "clinvar.2024_11" + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinvar_control_to_mapped_variant(session, score_set) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + response = client.get( + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" + f"?namespaces={clinvar_namespace}&drop_na_columns=false" + ) + assert response.status_code == 200 + reader = csv.DictReader(StringIO(response.text)) + assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames + assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames + + rows = list(reader) + # The first variant is linked to the ClinVar control; check its values. + assert rows[0][f"{clinvar_namespace}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"] + assert rows[0][f"{clinvar_namespace}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"] + # Other variants have no linked control for this version; they should be NA. + assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows[1:]) + assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows[1:]) + + +def test_download_clinvar_namespace_with_no_matching_version( + session, data_provider, client, setup_router_db, data_files +): + """When no controls match the requested ClinVar version, all rows return NA.""" + # clinvar.2023_01 does not match the seeded control (11_2024), so all rows should be NA. + clinvar_namespace = "clinvar.2023_01" + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinvar_control_to_mapped_variant(session, score_set) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + response = client.get( + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" + f"?namespaces={clinvar_namespace}&drop_na_columns=false" + ) + assert response.status_code == 200 + reader = csv.DictReader(StringIO(response.text)) + assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames + assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames + + rows = list(reader) + assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows) + assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows) + + +def test_download_multiple_clinvar_namespaces_in_variant_data_path( + session, data_provider, client, setup_router_db, data_files +): + """Multiple ClinVar namespaces produce distinct column sets; only the matching version has real data.""" + matching_ns = "clinvar.2024_11" # matches db_version="11_2024" seeded in setup_router_db + non_matching_ns = "clinvar.2023_01" # no controls with this version + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinvar_control_to_mapped_variant(session, score_set) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + response = client.get( + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" + f"?namespaces={matching_ns}&namespaces={non_matching_ns}&drop_na_columns=false" + ) + assert response.status_code == 200 + reader = csv.DictReader(StringIO(response.text)) + fieldnames = reader.fieldnames + # Both namespaces produce columns. + assert f"{matching_ns}.clinical_significance" in fieldnames + assert f"{matching_ns}.clinical_review_status" in fieldnames + assert f"{non_matching_ns}.clinical_significance" in fieldnames + assert f"{non_matching_ns}.clinical_review_status" in fieldnames + + rows = list(reader) + # Matching version: first variant has data. + assert rows[0][f"{matching_ns}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"] + assert rows[0][f"{matching_ns}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"] + # Non-matching version: all rows are NA. + assert all(row[f"{non_matching_ns}.clinical_significance"] == "NA" for row in rows) + assert all(row[f"{non_matching_ns}.clinical_review_status"] == "NA" for row in rows) + + +def test_invalid_clinvar_namespace_returns_422(client, setup_router_db, data_files): + """A clinvar namespace with an out-of-range month (13) is rejected with 422.""" + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=clinvar.2024_13" + ) + assert response.status_code == 422 + + +def test_unrecognized_namespace_returns_422(client, setup_router_db, data_files): + """An entirely unrecognized namespace string is rejected with 422.""" + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=unknown_namespace" + ) + assert response.status_code == 422 + + ######################################################################################################################## # Fetching clinical controls and control options for a score set ######################################################################################################################## From 77338e9973d24cdae863d782f23e8228996649d8 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 16:02:50 -0700 Subject: [PATCH 3/3] Linting and formatting --- src/mavedb/lib/score_sets.py | 2 +- src/mavedb/lib/validation/transform.py | 6 +- src/mavedb/routers/score_sets.py | 2 +- tests/helpers/util/score_set.py | 12 ++-- tests/routers/test_experiments.py | 76 +++++++++++++++----------- tests/routers/test_score_set.py | 8 +-- 6 files changed, 56 insertions(+), 50 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index a38a3001..35c041dd 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -4,7 +4,7 @@ import re from collections import Counter from operator import attrgetter -from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, List, Literal, Optional, Sequence +from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, List, Optional, Sequence import numpy as np import pandas as pd diff --git a/src/mavedb/lib/validation/transform.py b/src/mavedb/lib/validation/transform.py index 76529588..32c2dff1 100644 --- a/src/mavedb/lib/validation/transform.py +++ b/src/mavedb/lib/validation/transform.py @@ -35,9 +35,9 @@ def transform_score_set_list_to_urn_list( return [score_set.urn for score_set in score_sets] else: return [ - score_set.urn - for score_set in score_sets - if score_set.superseding_score_set is None or score_set.superseding_score_set.published_date is None + score_set.urn + for score_set in score_sets + if score_set.superseding_score_set is None or score_set.superseding_score_set.published_date is None ] diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 1f1bf6d6..d426278d 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -2,7 +2,7 @@ import logging import time from datetime import date, datetime -from typing import Any, List, Literal, Optional, Sequence, TypedDict, Union +from typing import Any, List, Optional, Sequence, TypedDict, Union import numpy as np import pandas as pd diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index 1fb61772..b6d7801a 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -165,9 +165,9 @@ def create_seq_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set @@ -196,9 +196,9 @@ def create_acc_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 1a04ed6a..2b6be3b5 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -363,10 +363,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_me assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -401,10 +400,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_sy assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -478,10 +476,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_syst assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -516,10 +513,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_mech assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -717,23 +713,28 @@ def test_update_experiment_keywords(session, client, setup_router_db): assert response.status_code == 200 experiment = response.json() experiment_post_payload = experiment.copy() - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": { - "key": "Phenotypic Assay Profiling Strategy", - "label": "Shotgun sequencing", - "special": False, - "description": "Description" - }, - "description": "Details of phenotypic assay profiling strategy", - }, - - ]}) + "keywords": [ + { + "keyword": { + "key": "Phenotypic Assay Profiling Strategy", + "label": "Shotgun sequencing", + "special": False, + "description": "Description", + }, + "description": "Details of phenotypic assay profiling strategy", + }, + ] + } + ) updated_response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) assert updated_response.status_code == 200 updated_experiment = updated_response.json() updated_expected_response = deepcopy(TEST_EXPERIMENT_WITH_UPDATE_KEYWORD_RESPONSE) - updated_expected_response.update({"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]}) + updated_expected_response.update( + {"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]} + ) assert sorted(updated_expected_response.keys()) == sorted(updated_experiment.keys()) for key in updated_experiment: assert (key, updated_expected_response[key]) == (key, updated_experiment[key]) @@ -745,12 +746,21 @@ def test_update_experiment_keywords_case_insensitive(session, client, setup_rout experiment = create_experiment(client) experiment_post_payload = experiment.copy() # Test database has Delivery Method. The updating keyword's key is delivery method. - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": {"key": "delivery method", "label": "Other", "special": False, "description": "Description"}, - "description": "Details of delivery method", - }, - ]}) + "keywords": [ + { + "keyword": { + "key": "delivery method", + "label": "Other", + "special": False, + "description": "Description", + }, + "description": "Details of delivery method", + }, + ] + } + ) response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) response_data = response.json() expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index ab5f7142..c33184a0 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -3261,9 +3261,7 @@ def test_invalid_clinvar_namespace_returns_422(client, setup_router_db, data_fil experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) - response = client.get( - f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=clinvar.2024_13" - ) + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=clinvar.2024_13") assert response.status_code == 422 @@ -3272,9 +3270,7 @@ def test_unrecognized_namespace_returns_422(client, setup_router_db, data_files) experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) - response = client.get( - f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=unknown_namespace" - ) + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=unknown_namespace") assert response.status_code == 422