Merge pull request #473 from bento-platform/lint/imports

lint: imports, dependencies, modern type hints, exports
bento-platform · Jan 30, 2024 · 2b56389 · 2b56389
2 parents 37462f6 + ccf85ea
commit 2b56389
Show file tree

Hide file tree

Showing 30 changed files with 188 additions and 171 deletions.
diff --git a/chord_metadata_service/chord/export/cbioportal.py b/chord_metadata_service/chord/export/cbioportal.py
@@ -1,6 +1,6 @@
 import logging
 import csv
-from typing import Callable, Dict, Optional, TextIO
+from typing import Callable, TextIO
 import re
 
 from django.db.models import F
@@ -130,7 +130,7 @@ def study_export_meta(dataset: Dataset, file_handle: TextIO) -> None:
     Study meta data file generation
     """
 
-    lines: Dict[str, str] = {
+    lines: dict[str, str] = {
         "type_of_cancer": "mixed",  # TODO: find if this information is available. !IMPORTANT! uses Oncotree codes
         "cancer_study_identifier": str(dataset.identifier),
         "name": dataset.title,
@@ -156,7 +156,7 @@ def clinical_meta_export(study_id: str, datatype: str, file_handle: TextIO):
     Clinical Metadata files generation (samples or patients)
     """
 
-    lines: Dict[str, str] = {
+    lines: dict[str, str] = {
         "cancer_study_identifier": study_id,
         "genetic_alteration_type": "CLINICAL",
     }
@@ -338,7 +338,7 @@ class CbioportalClinicalHeaderGenerator:
 
     fields_mapping = {}
 
-    def __init__(self, mappings: Optional[dict] = None):
+    def __init__(self, mappings: dict | None = None):
         self.fields_mapping = mappings or {}
 
     def make_header(self, fields: list):

diff --git a/chord_metadata_service/chord/ingest/__init__.py b/chord_metadata_service/chord/ingest/__init__.py
@@ -5,13 +5,13 @@
 from .phenopackets import ingest_phenopacket_workflow
 from .readsets import ingest_readset_workflow
 
-from typing import Callable, Dict
+from typing import Callable
 
 __all__ = [
     "WORKFLOW_INGEST_FUNCTION_MAP",
 ]
 
-WORKFLOW_INGEST_FUNCTION_MAP: Dict[str, Callable] = {
+WORKFLOW_INGEST_FUNCTION_MAP: dict[str, Callable] = {
     wm.WORKFLOW_EXPERIMENTS_JSON: ingest_experiments_workflow,
     wm.WORKFLOW_PHENOPACKETS_JSON: ingest_phenopacket_workflow,
     wm.WORKFLOW_FHIR_JSON: ingest_fhir_workflow,

diff --git a/chord_metadata_service/chord/ingest/constants.py b/chord_metadata_service/chord/ingest/constants.py
@@ -5,7 +5,8 @@
 from chord_metadata_service.chord.ingest.phenopackets import ingest_phenopacket_workflow, validate_phenopacket
 
 __all__ = [
-    "DATA_TYPE_TO_VALIDATOR_FN"
+    "DATA_TYPE_TO_VALIDATOR_FN",
+    "DATA_TYPE_TO_INGESTION_FN",
 ]
 
 

diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
@@ -7,8 +7,6 @@
 from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA
 from chord_metadata_service.phenopackets import models as pm
 
-from typing import Optional
-
 from .logger import logger
 from .resources import ingest_resource
 from .schema import schema_validation
@@ -54,7 +52,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
     return er_obj
 
 
-def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
+def validate_experiment(experiment_data, idx: int | None = None) -> None:
     # Validate experiment data against experiments schema.
     validation = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
     if not validation:
@@ -68,7 +66,7 @@ def ingest_experiment(
     experiment_data: dict,
     dataset_id: str,
     validate: bool = True,
-    idx: Optional[int] = None,
+    idx: int | None = None,
 ) -> em.Experiment:
     """Ingests a single experiment."""
 
@@ -95,7 +93,7 @@ def ingest_experiment(
     instrument = experiment_data.get("instrument", {})
     extra_properties = experiment_data.get("extra_properties", {})
 
-    biosample: Optional[pm.Biosample] = None
+    biosample: pm.Biosample | None = None
 
     # get existing biosample id
     if biosample_id is not None:
@@ -204,5 +202,5 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime
 # The table_id is required to fit the bento_ingest.schema.json in bento_lib,
 # but it is unused. It can be set to any valid table_id or to one of the override
 # values defined in view_ingest.py
-def ingest_maf_derived_from_vcf_workflow(json_data, dataset_id: str) -> list[em.ExperimentResult]:
+def ingest_maf_derived_from_vcf_workflow(json_data, _dataset_id: str) -> list[em.ExperimentResult]:
     return ingest_derived_experiment_results(json_data)
diff --git a/chord_metadata_service/chord/ingest/fhir.py b/chord_metadata_service/chord/ingest/fhir.py
@@ -10,14 +10,14 @@
 from .logger import logger
 from .utils import get_output_or_raise, workflow_file_output_to_path
 
-from typing import Callable, Dict
+from typing import Callable
 
 __all__ = [
     "ingest_fhir_workflow",
 ]
 
 
-file_id_to_ingest_fn: Dict[str, Callable] = {
+file_id_to_ingest_fn: dict[str, Callable] = {
     "patients": ingest_patients,
     "observations": ingest_observations,
     "conditions": ingest_conditions,

diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py
@@ -17,14 +17,14 @@
 from .schema import schema_validation
 from .utils import map_if_list, query_and_check_nulls
 from .logger import logger
-from typing import Any, Dict, Iterable, Optional, Union, Callable, TypeVar
+from typing import Any, Callable, Iterable, TypeVar
 from django.db.models import Model
 
 # Generic TypeVar for django db models
 T = TypeVar('T', bound=Model)
 
 
-def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> Optional[T]:
+def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> T | None:
     """
     Helper function to get or create DB objects if a key is in a dict
     """
@@ -34,7 +34,7 @@ def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> O
     return obj
 
 
-def _clean_extra_properties(extra_properties: dict) -> Dict:
+def _clean_extra_properties(extra_properties: dict[str, Any]) -> dict[str, Any]:
     """
     Removes computed properties from an extra_properties dictionary.
     Computed extra_properties start with "__" and should never be ingested.
@@ -82,7 +82,7 @@ def get_or_create_phenotypic_feature(pf: dict) -> pm.PhenotypicFeature:
 
 def validate_phenopacket(phenopacket_data: dict[str, Any],
                          schema: dict = PHENOPACKET_SCHEMA,
-                         idx: Optional[int] = None) -> None:
+                         idx: int | None = None) -> None:
     # Validate phenopacket data against phenopackets schema.
     # validation = schema_validation(phenopacket_data, PHENOPACKET_SCHEMA)
     validation = schema_validation(phenopacket_data, schema, registry=VRS_REF_REGISTRY)
@@ -106,8 +106,8 @@ def update_or_create_subject(subject: dict) -> pm.Individual:
 
     # --------------------------------------------------------------------------------------------------------------
 
-    age_numeric_value: Optional[Decimal] = None
-    age_unit_value: Optional[str] = None
+    age_numeric_value: Decimal | None = None
+    age_unit_value: str | None = None
     if "time_at_last_encounter" in subject:
         age_numeric_value, age_unit_value = time_element_to_years(subject["time_at_last_encounter"])
 
@@ -318,7 +318,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
                        dataset_id: str,
                        json_schema: dict = PHENOPACKET_SCHEMA,
                        validate: bool = True,
-                       idx: Optional[int] = None) -> pm.Phenopacket:
+                       idx: int | None = None) -> pm.Phenopacket:
     """Ingests a single phenopacket."""
 
     if validate:
@@ -372,7 +372,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
     # If there's a subject attached to the phenopacket, create it
     # - or, if it already exists, *update* the extra properties if needed.
     #   This is one of the few cases of 'updating' something that exists in Katsu.
-    subject_obj: Optional[pm.Individual] = None
+    subject_obj: pm.Individual | None = None
     if subject:  # we have a dictionary of subject data in the phenopacket
         subject_obj = update_or_create_subject(subject)
 
@@ -423,7 +423,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
     return phenopacket
 
 
-def ingest_phenopacket_workflow(json_data, dataset_id) -> Union[list[pm.Phenopacket], pm.Phenopacket]:
+def ingest_phenopacket_workflow(json_data, dataset_id) -> list[pm.Phenopacket] | pm.Phenopacket:
     project_id = Project.objects.get(datasets=dataset_id)
     project_schemas: Iterable[ExtensionSchemaDict] = ProjectJsonSchema.objects.filter(project_id=project_id).values(
         "json_schema",
@@ -432,7 +432,7 @@ def ingest_phenopacket_workflow(json_data, dataset_id) -> Union[list[pm.Phenopac
     )
 
     # Map with key:schema_type and value:json_schema
-    extension_schemas: Dict[str, ExtensionSchemaDict] = {
+    extension_schemas: dict[str, ExtensionSchemaDict] = {
         proj_schema["schema_type"].lower(): proj_schema
         for proj_schema in project_schemas
     }

diff --git a/chord_metadata_service/chord/ingest/readsets.py b/chord_metadata_service/chord/ingest/readsets.py
@@ -6,7 +6,7 @@
 # it can be any existing table_id which can be validated
 # the workflow only performs copying files over to the DRS
 # TODO: make a workflow to deposit files on DRS
-def ingest_readset_workflow(workflow_outputs, dataset_id):
+def ingest_readset_workflow(workflow_outputs, _dataset_id):
     logger.info(f"Current workflow outputs : {workflow_outputs}")
     for readset_file in get_output_or_raise(workflow_outputs, "readset_files"):
         with workflow_file_output_to_path(readset_file) as readset_file_path:

diff --git a/chord_metadata_service/chord/tests/test_export_cbio.py b/chord_metadata_service/chord/tests/test_export_cbio.py
@@ -1,5 +1,5 @@
 import io
-from typing import Dict, TextIO
+from typing import TextIO
 from os import walk, path
 
 from django.db.models import F
@@ -57,7 +57,7 @@ def setUp(self) -> None:
         self.exp_res = ExperimentResult.objects.all()
 
     @staticmethod
-    def stream_to_dict(output: TextIO) -> Dict[str, str]:
+    def stream_to_dict(output: TextIO) -> dict[str, str]:
         """
         Utility function. Parses cBioPortal metadata text files (lines of
         key/value pairs separated by `: `) in a dictionary structure.

diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py
@@ -18,7 +18,7 @@
 from rest_framework.response import Response
 from rest_framework import status
 
-from typing import Callable, Dict, Optional, Tuple, Union
+from typing import Callable
 from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly, ReadOnly
 
 from chord_metadata_service.logger import logger
@@ -207,7 +207,7 @@ def phenopacket_query_results(query, params, options=None):
             .prefetch_related(*PHENOPACKET_PREFETCH)
 
 
-QUERY_RESULTS_FN: Dict[str, Callable] = {
+QUERY_RESULTS_FN: dict[str, Callable] = {
     DATA_TYPE_EXPERIMENT: experiment_query_results,
     DATA_TYPE_PHENOPACKET: phenopacket_query_results,
 }
@@ -503,7 +503,7 @@ def get_chord_search_parameters(request, data_type=None):
 def chord_dataset_search(
         search_params,
         dataset_id, start,
-        internal=False) -> Tuple[Union[None, bool, list], Optional[str]]:
+        internal=False) -> tuple[bool | list | None, str | None]:
     """
     Performs a search based on a psycopg2 object and paramaters and restricted
     to a given table.

diff --git a/chord_metadata_service/cleanup/remove.py b/chord_metadata_service/cleanup/remove.py
@@ -1,5 +1,5 @@
 from django.db.models import Model
-from typing import Set, Type, Union
+from typing import Type
 
 from ..logger import logger
 from ..utils import build_id_set
@@ -10,7 +10,7 @@
 ]
 
 
-async def remove_items(model: Type[Model], to_remove: Set[Union[int, str, None]], name_plural: str) -> int:
+async def remove_items(model: Type[Model], to_remove: set[int | str | None], name_plural: str) -> int:
     n_to_remove = len(to_remove)
 
     if n_to_remove:
@@ -22,7 +22,7 @@ async def remove_items(model: Type[Model], to_remove: Set[Union[int, str, None]]
     return n_to_remove
 
 
-async def remove_not_referenced(model: Type[Model], references: Set[Union[int, str, None]], name_plural: str) -> int:
+async def remove_not_referenced(model: Type[Model], references: set[int | str | None], name_plural: str) -> int:
     objs_referenced = references.copy()
 
     # Remove null from set

diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py
@@ -1,5 +1,6 @@
 from django_filters.rest_framework import DjangoFilterBackend
-from rest_framework import viewsets, mixins
+from drf_spectacular.utils import extend_schema, inline_serializer
+from rest_framework import mixins, serializers, status, viewsets
 from rest_framework.settings import api_settings
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.permissions import AllowAny
@@ -10,8 +11,6 @@
 from .schemas import EXPERIMENT_SCHEMA
 from .filters import ExperimentFilter, ExperimentResultFilter
 from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination
-from drf_spectacular.utils import extend_schema, inline_serializer
-from rest_framework import serializers, status
 
 
 from chord_metadata_service.restapi.api_renderers import (

diff --git a/chord_metadata_service/experiments/serializers.py b/chord_metadata_service/experiments/serializers.py
@@ -1,6 +1,6 @@
+from chord_metadata_service.patients.models import Individual
 from chord_metadata_service.restapi.serializers import GenericSerializer
 from .models import Experiment, ExperimentResult, Instrument
-from chord_metadata_service.patients.models import Individual
 
 
 __all__ = ["ExperimentSerializer", "ExperimentResultSerializer", "InstrumentSerializer"]

diff --git a/chord_metadata_service/patients/models.py b/chord_metadata_service/patients/models.py
@@ -1,4 +1,3 @@
-from typing import Optional
 from django.apps import apps
 from django.db import models
 from django.db.models import JSONField
@@ -29,7 +28,7 @@ class Individual(BaseExtraProperties, BaseTimeStamp, IndexableMixin):
     def schema_type(self) -> SchemaType:
         return SchemaType.INDIVIDUAL
 
-    def get_project_id(self) -> Optional[str]:
+    def get_project_id(self) -> str | None:
         if not self.phenopackets.count():
             # Need to wait for phenopacket to exist
             return None

diff --git a/chord_metadata_service/patients/schemas.py b/chord_metadata_service/patients/schemas.py
@@ -1,4 +1,4 @@
-from chord_metadata_service.restapi.schema_utils import DATE_TIME, DRAFT_07, SCHEMA_TYPES, array_of, base_type, \
+from chord_metadata_service.restapi.schema_utils import DATE_TIME, DRAFT_07, SchemaTypes, array_of, base_type, \
     enum_of, tag_ids_and_describe, get_schema_app_id, sub_schema_uri
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS, EXTRA_PROPERTIES_SCHEMA, TIME_ELEMENT_SCHEMA
 from pathlib import Path
@@ -16,7 +16,7 @@
         "status": enum_of(["UNKNOWN_STATUS", "ALIVE", "DECEASED"]),
         "time_of_death": TIME_ELEMENT_SCHEMA,
         "cause_of_death": ONTOLOGY_CLASS,
-        "survival_time_in_days": base_type(SCHEMA_TYPES.INTEGER)
+        "survival_time_in_days": base_type(SchemaTypes.INTEGER)
     },
     "required": ["status"]
 }, VITAL_STATUS)
@@ -28,8 +28,8 @@
     "type": "object",
     "properties": {
         # Phenopacket V2 Individual fields
-        "id": base_type(SCHEMA_TYPES.STRING, description="Unique researcher-specified identifier for the individual."),
-        "alternate_ids": array_of(base_type(SCHEMA_TYPES.STRING)),
+        "id": base_type(SchemaTypes.STRING, description="Unique researcher-specified identifier for the individual."),
+        "alternate_ids": array_of(base_type(SchemaTypes.STRING)),
         "date_of_birth": DATE_TIME,
         "time_at_last_encounter": TIME_ELEMENT_SCHEMA,
         "vital_status": VITAL_STATUS_SCHEMA,

diff --git a/chord_metadata_service/patients/tests/constants.py b/chord_metadata_service/patients/tests/constants.py
@@ -1,4 +1,3 @@
-from typing import Tuple, Optional
 import uuid
 import random
 from datetime import date, timedelta
@@ -90,8 +89,8 @@ def generate_date_in_range(lower_year: int, upper_year: int):
 INDIVIDUAL_2_CSV = "patient:2,MALE,1967-01-01,human,UNKNOWN_KARYOTYPE,P50Y,,--IGNORE--,--IGNORE--"
 
 
-def generate_valid_individual(age=None, age_range=None, gen_random_age: Optional[Tuple[int, int]] = None,
-                              date_of_consent_range: Tuple[int, int] = (2020, 2023)):
+def generate_valid_individual(age=None, age_range=None, gen_random_age: tuple[int, int] | None = None,
+                              date_of_consent_range: tuple[int, int] = (2020, 2023)):
     if age and age_range:
         raise ValueError("Cannot use 'age' and 'age_range' simultaneously for Individual.time_at_last_encounter.")
 

diff --git a/chord_metadata_service/patients/values.py b/chord_metadata_service/patients/values.py
@@ -1,5 +1,4 @@
 from abc import ABC
-from typing import Tuple
 
 
 __all__ = [
@@ -10,7 +9,7 @@
 
 
 class ValuesCollection(ABC):
-    values: Tuple[str, ...] = ()
+    values: tuple[str, ...] = ()
 
     @classmethod
     def as_django_values(cls):

diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py
@@ -1,4 +1,3 @@
-from typing import Optional
 from django.apps import apps
 from django.db import models
 from django.core.exceptions import ValidationError, ObjectDoesNotExist
@@ -196,7 +195,7 @@ def get_sample_tissue_data(self):
     def schema_type(self) -> SchemaType:
         return SchemaType.BIOSAMPLE
 
-    def get_project_id(self) -> Optional[str]:
+    def get_project_id(self) -> str | None:
         model = apps.get_model("phenopackets.Phenopacket")
         if len(phenopackets := model.objects.filter(biosamples__id=self.id)) < 1:
             return None
@@ -424,7 +423,7 @@ class Meta:
     def schema_type(self) -> SchemaType:
         return SchemaType.PHENOPACKET
 
-    def get_project_id(self) -> Optional[str]:
+    def get_project_id(self) -> str | None:
         model = apps.get_model("chord.Project")
         try:
             project = model.objects.get(datasets=self.dataset)