Skip to content

Commit

Permalink
Merge pull request #473 from bento-platform/lint/imports
Browse files Browse the repository at this point in the history
lint: imports, dependencies, modern type hints, exports
  • Loading branch information
davidlougheed committed Jan 30, 2024
2 parents 37462f6 + ccf85ea commit 2b56389
Show file tree
Hide file tree
Showing 30 changed files with 188 additions and 171 deletions.
8 changes: 4 additions & 4 deletions chord_metadata_service/chord/export/cbioportal.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import csv
from typing import Callable, Dict, Optional, TextIO
from typing import Callable, TextIO
import re

from django.db.models import F
Expand Down Expand Up @@ -130,7 +130,7 @@ def study_export_meta(dataset: Dataset, file_handle: TextIO) -> None:
Study meta data file generation
"""

lines: Dict[str, str] = {
lines: dict[str, str] = {
"type_of_cancer": "mixed", # TODO: find if this information is available. !IMPORTANT! uses Oncotree codes
"cancer_study_identifier": str(dataset.identifier),
"name": dataset.title,
Expand All @@ -156,7 +156,7 @@ def clinical_meta_export(study_id: str, datatype: str, file_handle: TextIO):
Clinical Metadata files generation (samples or patients)
"""

lines: Dict[str, str] = {
lines: dict[str, str] = {
"cancer_study_identifier": study_id,
"genetic_alteration_type": "CLINICAL",
}
Expand Down Expand Up @@ -338,7 +338,7 @@ class CbioportalClinicalHeaderGenerator:

fields_mapping = {}

def __init__(self, mappings: Optional[dict] = None):
def __init__(self, mappings: dict | None = None):
self.fields_mapping = mappings or {}

def make_header(self, fields: list):
Expand Down
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from .phenopackets import ingest_phenopacket_workflow
from .readsets import ingest_readset_workflow

from typing import Callable, Dict
from typing import Callable

__all__ = [
"WORKFLOW_INGEST_FUNCTION_MAP",
]

WORKFLOW_INGEST_FUNCTION_MAP: Dict[str, Callable] = {
WORKFLOW_INGEST_FUNCTION_MAP: dict[str, Callable] = {
wm.WORKFLOW_EXPERIMENTS_JSON: ingest_experiments_workflow,
wm.WORKFLOW_PHENOPACKETS_JSON: ingest_phenopacket_workflow,
wm.WORKFLOW_FHIR_JSON: ingest_fhir_workflow,
Expand Down
3 changes: 2 additions & 1 deletion chord_metadata_service/chord/ingest/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from chord_metadata_service.chord.ingest.phenopackets import ingest_phenopacket_workflow, validate_phenopacket

__all__ = [
"DATA_TYPE_TO_VALIDATOR_FN"
"DATA_TYPE_TO_VALIDATOR_FN",
"DATA_TYPE_TO_INGESTION_FN",
]


Expand Down
10 changes: 4 additions & 6 deletions chord_metadata_service/chord/ingest/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA
from chord_metadata_service.phenopackets import models as pm

from typing import Optional

from .logger import logger
from .resources import ingest_resource
from .schema import schema_validation
Expand Down Expand Up @@ -54,7 +52,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
return er_obj


def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
def validate_experiment(experiment_data, idx: int | None = None) -> None:
# Validate experiment data against experiments schema.
validation = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
if not validation:
Expand All @@ -68,7 +66,7 @@ def ingest_experiment(
experiment_data: dict,
dataset_id: str,
validate: bool = True,
idx: Optional[int] = None,
idx: int | None = None,
) -> em.Experiment:
"""Ingests a single experiment."""

Expand All @@ -95,7 +93,7 @@ def ingest_experiment(
instrument = experiment_data.get("instrument", {})
extra_properties = experiment_data.get("extra_properties", {})

biosample: Optional[pm.Biosample] = None
biosample: pm.Biosample | None = None

# get existing biosample id
if biosample_id is not None:
Expand Down Expand Up @@ -204,5 +202,5 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime
# The table_id is required to fit the bento_ingest.schema.json in bento_lib,
# but it is unused. It can be set to any valid table_id or to one of the override
# values defined in view_ingest.py
def ingest_maf_derived_from_vcf_workflow(json_data, dataset_id: str) -> list[em.ExperimentResult]:
def ingest_maf_derived_from_vcf_workflow(json_data, _dataset_id: str) -> list[em.ExperimentResult]:
return ingest_derived_experiment_results(json_data)
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/ingest/fhir.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
from .logger import logger
from .utils import get_output_or_raise, workflow_file_output_to_path

from typing import Callable, Dict
from typing import Callable

__all__ = [
"ingest_fhir_workflow",
]


file_id_to_ingest_fn: Dict[str, Callable] = {
file_id_to_ingest_fn: dict[str, Callable] = {
"patients": ingest_patients,
"observations": ingest_observations,
"conditions": ingest_conditions,
Expand Down
20 changes: 10 additions & 10 deletions chord_metadata_service/chord/ingest/phenopackets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from .schema import schema_validation
from .utils import map_if_list, query_and_check_nulls
from .logger import logger
from typing import Any, Dict, Iterable, Optional, Union, Callable, TypeVar
from typing import Any, Callable, Iterable, TypeVar
from django.db.models import Model

# Generic TypeVar for django db models
T = TypeVar('T', bound=Model)


def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> Optional[T]:
def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> T | None:
"""
Helper function to get or create DB objects if a key is in a dict
"""
Expand All @@ -34,7 +34,7 @@ def _get_or_create_opt(key: str, data: dict, create_func: Callable[..., T]) -> O
return obj


def _clean_extra_properties(extra_properties: dict) -> Dict:
def _clean_extra_properties(extra_properties: dict[str, Any]) -> dict[str, Any]:
"""
Removes computed properties from an extra_properties dictionary.
Computed extra_properties start with "__" and should never be ingested.
Expand Down Expand Up @@ -82,7 +82,7 @@ def get_or_create_phenotypic_feature(pf: dict) -> pm.PhenotypicFeature:

def validate_phenopacket(phenopacket_data: dict[str, Any],
schema: dict = PHENOPACKET_SCHEMA,
idx: Optional[int] = None) -> None:
idx: int | None = None) -> None:
# Validate phenopacket data against phenopackets schema.
# validation = schema_validation(phenopacket_data, PHENOPACKET_SCHEMA)
validation = schema_validation(phenopacket_data, schema, registry=VRS_REF_REGISTRY)
Expand All @@ -106,8 +106,8 @@ def update_or_create_subject(subject: dict) -> pm.Individual:

# --------------------------------------------------------------------------------------------------------------

age_numeric_value: Optional[Decimal] = None
age_unit_value: Optional[str] = None
age_numeric_value: Decimal | None = None
age_unit_value: str | None = None
if "time_at_last_encounter" in subject:
age_numeric_value, age_unit_value = time_element_to_years(subject["time_at_last_encounter"])

Expand Down Expand Up @@ -318,7 +318,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
dataset_id: str,
json_schema: dict = PHENOPACKET_SCHEMA,
validate: bool = True,
idx: Optional[int] = None) -> pm.Phenopacket:
idx: int | None = None) -> pm.Phenopacket:
"""Ingests a single phenopacket."""

if validate:
Expand Down Expand Up @@ -372,7 +372,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
# If there's a subject attached to the phenopacket, create it
# - or, if it already exists, *update* the extra properties if needed.
# This is one of the few cases of 'updating' something that exists in Katsu.
subject_obj: Optional[pm.Individual] = None
subject_obj: pm.Individual | None = None
if subject: # we have a dictionary of subject data in the phenopacket
subject_obj = update_or_create_subject(subject)

Expand Down Expand Up @@ -423,7 +423,7 @@ def ingest_phenopacket(phenopacket_data: dict[str, Any],
return phenopacket


def ingest_phenopacket_workflow(json_data, dataset_id) -> Union[list[pm.Phenopacket], pm.Phenopacket]:
def ingest_phenopacket_workflow(json_data, dataset_id) -> list[pm.Phenopacket] | pm.Phenopacket:
project_id = Project.objects.get(datasets=dataset_id)
project_schemas: Iterable[ExtensionSchemaDict] = ProjectJsonSchema.objects.filter(project_id=project_id).values(
"json_schema",
Expand All @@ -432,7 +432,7 @@ def ingest_phenopacket_workflow(json_data, dataset_id) -> Union[list[pm.Phenopac
)

# Map with key:schema_type and value:json_schema
extension_schemas: Dict[str, ExtensionSchemaDict] = {
extension_schemas: dict[str, ExtensionSchemaDict] = {
proj_schema["schema_type"].lower(): proj_schema
for proj_schema in project_schemas
}
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/chord/ingest/readsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# it can be any existing table_id which can be validated
# the workflow only performs copying files over to the DRS
# TODO: make a workflow to deposit files on DRS
def ingest_readset_workflow(workflow_outputs, dataset_id):
def ingest_readset_workflow(workflow_outputs, _dataset_id):
logger.info(f"Current workflow outputs : {workflow_outputs}")
for readset_file in get_output_or_raise(workflow_outputs, "readset_files"):
with workflow_file_output_to_path(readset_file) as readset_file_path:
Expand Down
4 changes: 2 additions & 2 deletions chord_metadata_service/chord/tests/test_export_cbio.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import io
from typing import Dict, TextIO
from typing import TextIO
from os import walk, path

from django.db.models import F
Expand Down Expand Up @@ -57,7 +57,7 @@ def setUp(self) -> None:
self.exp_res = ExperimentResult.objects.all()

@staticmethod
def stream_to_dict(output: TextIO) -> Dict[str, str]:
def stream_to_dict(output: TextIO) -> dict[str, str]:
"""
Utility function. Parses cBioPortal metadata text files (lines of
key/value pairs separated by `: `) in a dictionary structure.
Expand Down
6 changes: 3 additions & 3 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from rest_framework.response import Response
from rest_framework import status

from typing import Callable, Dict, Optional, Tuple, Union
from typing import Callable
from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly, ReadOnly

from chord_metadata_service.logger import logger
Expand Down Expand Up @@ -207,7 +207,7 @@ def phenopacket_query_results(query, params, options=None):
.prefetch_related(*PHENOPACKET_PREFETCH)


QUERY_RESULTS_FN: Dict[str, Callable] = {
QUERY_RESULTS_FN: dict[str, Callable] = {
DATA_TYPE_EXPERIMENT: experiment_query_results,
DATA_TYPE_PHENOPACKET: phenopacket_query_results,
}
Expand Down Expand Up @@ -503,7 +503,7 @@ def get_chord_search_parameters(request, data_type=None):
def chord_dataset_search(
search_params,
dataset_id, start,
internal=False) -> Tuple[Union[None, bool, list], Optional[str]]:
internal=False) -> tuple[bool | list | None, str | None]:
"""
Performs a search based on a psycopg2 object and paramaters and restricted
to a given table.
Expand Down
6 changes: 3 additions & 3 deletions chord_metadata_service/cleanup/remove.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.db.models import Model
from typing import Set, Type, Union
from typing import Type

from ..logger import logger
from ..utils import build_id_set
Expand All @@ -10,7 +10,7 @@
]


async def remove_items(model: Type[Model], to_remove: Set[Union[int, str, None]], name_plural: str) -> int:
async def remove_items(model: Type[Model], to_remove: set[int | str | None], name_plural: str) -> int:
n_to_remove = len(to_remove)

if n_to_remove:
Expand All @@ -22,7 +22,7 @@ async def remove_items(model: Type[Model], to_remove: Set[Union[int, str, None]]
return n_to_remove


async def remove_not_referenced(model: Type[Model], references: Set[Union[int, str, None]], name_plural: str) -> int:
async def remove_not_referenced(model: Type[Model], references: set[int | str | None], name_plural: str) -> int:
objs_referenced = references.copy()

# Remove null from set
Expand Down
5 changes: 2 additions & 3 deletions chord_metadata_service/experiments/api_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django_filters.rest_framework import DjangoFilterBackend
from rest_framework import viewsets, mixins
from drf_spectacular.utils import extend_schema, inline_serializer
from rest_framework import mixins, serializers, status, viewsets
from rest_framework.settings import api_settings
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
Expand All @@ -10,8 +11,6 @@
from .schemas import EXPERIMENT_SCHEMA
from .filters import ExperimentFilter, ExperimentResultFilter
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination
from drf_spectacular.utils import extend_schema, inline_serializer
from rest_framework import serializers, status


from chord_metadata_service.restapi.api_renderers import (
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/experiments/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from chord_metadata_service.patients.models import Individual
from chord_metadata_service.restapi.serializers import GenericSerializer
from .models import Experiment, ExperimentResult, Instrument
from chord_metadata_service.patients.models import Individual


__all__ = ["ExperimentSerializer", "ExperimentResultSerializer", "InstrumentSerializer"]
Expand Down
3 changes: 1 addition & 2 deletions chord_metadata_service/patients/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Optional
from django.apps import apps
from django.db import models
from django.db.models import JSONField
Expand Down Expand Up @@ -29,7 +28,7 @@ class Individual(BaseExtraProperties, BaseTimeStamp, IndexableMixin):
def schema_type(self) -> SchemaType:
return SchemaType.INDIVIDUAL

def get_project_id(self) -> Optional[str]:
def get_project_id(self) -> str | None:
if not self.phenopackets.count():
# Need to wait for phenopacket to exist
return None
Expand Down
8 changes: 4 additions & 4 deletions chord_metadata_service/patients/schemas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from chord_metadata_service.restapi.schema_utils import DATE_TIME, DRAFT_07, SCHEMA_TYPES, array_of, base_type, \
from chord_metadata_service.restapi.schema_utils import DATE_TIME, DRAFT_07, SchemaTypes, array_of, base_type, \
enum_of, tag_ids_and_describe, get_schema_app_id, sub_schema_uri
from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS, EXTRA_PROPERTIES_SCHEMA, TIME_ELEMENT_SCHEMA
from pathlib import Path
Expand All @@ -16,7 +16,7 @@
"status": enum_of(["UNKNOWN_STATUS", "ALIVE", "DECEASED"]),
"time_of_death": TIME_ELEMENT_SCHEMA,
"cause_of_death": ONTOLOGY_CLASS,
"survival_time_in_days": base_type(SCHEMA_TYPES.INTEGER)
"survival_time_in_days": base_type(SchemaTypes.INTEGER)
},
"required": ["status"]
}, VITAL_STATUS)
Expand All @@ -28,8 +28,8 @@
"type": "object",
"properties": {
# Phenopacket V2 Individual fields
"id": base_type(SCHEMA_TYPES.STRING, description="Unique researcher-specified identifier for the individual."),
"alternate_ids": array_of(base_type(SCHEMA_TYPES.STRING)),
"id": base_type(SchemaTypes.STRING, description="Unique researcher-specified identifier for the individual."),
"alternate_ids": array_of(base_type(SchemaTypes.STRING)),
"date_of_birth": DATE_TIME,
"time_at_last_encounter": TIME_ELEMENT_SCHEMA,
"vital_status": VITAL_STATUS_SCHEMA,
Expand Down
5 changes: 2 additions & 3 deletions chord_metadata_service/patients/tests/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Tuple, Optional
import uuid
import random
from datetime import date, timedelta
Expand Down Expand Up @@ -90,8 +89,8 @@ def generate_date_in_range(lower_year: int, upper_year: int):
INDIVIDUAL_2_CSV = "patient:2,MALE,1967-01-01,human,UNKNOWN_KARYOTYPE,P50Y,,--IGNORE--,--IGNORE--"


def generate_valid_individual(age=None, age_range=None, gen_random_age: Optional[Tuple[int, int]] = None,
date_of_consent_range: Tuple[int, int] = (2020, 2023)):
def generate_valid_individual(age=None, age_range=None, gen_random_age: tuple[int, int] | None = None,
date_of_consent_range: tuple[int, int] = (2020, 2023)):
if age and age_range:
raise ValueError("Cannot use 'age' and 'age_range' simultaneously for Individual.time_at_last_encounter.")

Expand Down
3 changes: 1 addition & 2 deletions chord_metadata_service/patients/values.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from abc import ABC
from typing import Tuple


__all__ = [
Expand All @@ -10,7 +9,7 @@


class ValuesCollection(ABC):
values: Tuple[str, ...] = ()
values: tuple[str, ...] = ()

@classmethod
def as_django_values(cls):
Expand Down
5 changes: 2 additions & 3 deletions chord_metadata_service/phenopackets/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Optional
from django.apps import apps
from django.db import models
from django.core.exceptions import ValidationError, ObjectDoesNotExist
Expand Down Expand Up @@ -196,7 +195,7 @@ def get_sample_tissue_data(self):
def schema_type(self) -> SchemaType:
return SchemaType.BIOSAMPLE

def get_project_id(self) -> Optional[str]:
def get_project_id(self) -> str | None:
model = apps.get_model("phenopackets.Phenopacket")
if len(phenopackets := model.objects.filter(biosamples__id=self.id)) < 1:
return None
Expand Down Expand Up @@ -424,7 +423,7 @@ class Meta:
def schema_type(self) -> SchemaType:
return SchemaType.PHENOPACKET

def get_project_id(self) -> Optional[str]:
def get_project_id(self) -> str | None:
model = apps.get_model("chord.Project")
try:
project = model.objects.get(datasets=self.dataset)
Expand Down

0 comments on commit 2b56389

Please sign in to comment.