Skip to content

Commit

Permalink
Merge pull request #449 from bento-platform/features/remove-mcode
Browse files Browse the repository at this point in the history
chore: remove mcode models and references
  • Loading branch information
v-rocheleau committed Oct 25, 2023
2 parents bdf4b01 + 6ba8ec2 commit 2817fd8
Show file tree
Hide file tree
Showing 54 changed files with 184 additions and 174,501 deletions.
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,22 @@ CANARIE funded initial development of the Katsu Metadata service under the CHORD
Katsu Metadata Service is a service to store epigenomic metadata.

1. Patients service handles anonymized individual’s data (individual id, sex, age or date of birth)
* Data model: aggregated profile from GA4GH Phenopackets Individual, FHIR Patient and mCODE Patient.
* Data model: aggregated profile from GA4GH Phenopackets Individual, and FHIR Patient.

2. Phenopackets service handles phenotypic and clinical data
* Data model: [GA4GH Phenopackets schema](https://github.com/phenopackets/phenopacket-schema)

3. mCode service handles patient's oncology related data.
* Data model: [mCODE data elements](https://mcodeinitiative.org/)

4. Experiments service handles experiment related data.
3. Experiments service handles experiment related data.
* Data model: derived from
[IHEC Metadata Experiment](https://github.com/IHEC/ihec-ecosystems/blob/master/docs/metadata/2.0/Ihec_metadata_specification.md#experiments)

5. Resources service handles metadata about ontologies used for data annotation.
4. Resources service handles metadata about ontologies used for data annotation.
* Data model: derived from Phenopackets Resource profile

6. CHORD service handles metadata about dataset, has relation to phenopackets (one dataset can have many phenopackets)
5. CHORD service handles metadata about dataset, has relation to phenopackets (one dataset can have many phenopackets)
* Data model: [DATS](https://github.com/datatagsuite) + [GA4GH DUO](https://github.com/EBISPOT/DUO)

7. Rest api service handles all generic functionality shared among other services
6. Rest api service handles all generic functionality shared among other services


## REST API highlights
Expand Down
10 changes: 0 additions & 10 deletions chord_metadata_service/chord/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

from chord_metadata_service.experiments.search_schemas import EXPERIMENT_SEARCH_SCHEMA
from chord_metadata_service.phenopackets.search_schemas import PHENOPACKET_SEARCH_SCHEMA
# from chord_metadata_service.mcode.schemas import MCODE_SCHEMA
from chord_metadata_service.experiments.schemas import EXPERIMENT_RESULT_SCHEMA

__all__ = [
"DATA_TYPE_EXPERIMENT",
"DATA_TYPE_EXPERIMENT_RESULT",
"DATA_TYPE_PHENOPACKET",
"DATA_TYPE_MCODEPACKET",
"DATA_TYPE_READSET",
"DATA_TYPES",
]
Expand All @@ -18,7 +16,6 @@
DATA_TYPE_EXPERIMENT = "experiment"
DATA_TYPE_EXPERIMENT_RESULT = "experiment_result"
DATA_TYPE_PHENOPACKET = "phenopacket"
DATA_TYPE_MCODEPACKET = "mcodepacket"
DATA_TYPE_READSET = "readset"

DATA_TYPES = {
Expand All @@ -38,13 +35,6 @@
"type": "object", # TODO
},
},
# De-listed 2022-12-08 - David L
# DATA_TYPE_MCODEPACKET: {
# "schema": MCODE_SCHEMA,
# "metadata_schema": {
# "type": "object", # TODO
# }
# },
DATA_TYPE_READSET: {
"label": "Readsets",
"queryable": False,
Expand Down
3 changes: 0 additions & 3 deletions chord_metadata_service/chord/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from .experiments import ingest_experiments_workflow, ingest_maf_derived_from_vcf_workflow
from .fhir import ingest_fhir_workflow
from .mcode import ingest_mcode_fhir_workflow, ingest_mcode_workflow
from .phenopackets import ingest_phenopacket_workflow
from .readsets import ingest_readset_workflow

Expand All @@ -16,8 +15,6 @@
wm.WORKFLOW_EXPERIMENTS_JSON: ingest_experiments_workflow,
wm.WORKFLOW_PHENOPACKETS_JSON: ingest_phenopacket_workflow,
wm.WORKFLOW_FHIR_JSON: ingest_fhir_workflow,
wm.WORKFLOW_MCODE_FHIR_JSON: ingest_mcode_fhir_workflow,
wm.WORKFLOW_MCODE_JSON: ingest_mcode_workflow,
wm.WORKFLOW_READSET: ingest_readset_workflow,
wm.WORKFLOW_MAF_DERIVED_FROM_VCF_JSON: ingest_maf_derived_from_vcf_workflow,
}
2 changes: 0 additions & 2 deletions chord_metadata_service/chord/ingest/phenopackets.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ def update_or_create_subject(subject: dict) -> pm.Individual:
id=subject["id"],
# if left out/null, karyotypic_sex defaults to UNKNOWN_KARYOTYPE
karyotypic_sex=subject.get("karyotypic_sex") or KaryotypicSex.UNKNOWN_KARYOTYPE,
race=subject.get("race", ""),
ethnicity=subject.get("ethnicity", ""),
age_numeric=age_numeric_value,
age_unit=age_unit_value if age_unit_value else "",
extra_properties=existing_extra_properties,
Expand Down

This file was deleted.

2 changes: 0 additions & 2 deletions chord_metadata_service/chord/migrations/0006_v4_0_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

class Migration(migrations.Migration):

replaces = [('chord', '0006_remove_tableownership_dataset'), ('chord', '0007_delete_table_delete_tableownership')]

dependencies = [
('phenopackets', '0005_v4_0_0'),
('chord', '0005_v3_0_0'),
Expand Down

This file was deleted.

1 change: 0 additions & 1 deletion chord_metadata_service/chord/tests/test_api_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def test_data_type_list(self):
self.assertEqual(len(c), len(DATA_TYPES))
ids = [dt["id"] for dt in c]
self.assertIn(DATA_TYPE_EXPERIMENT, ids)
# self.assertIn(DATA_TYPE_MCODEPACKET, ids)
self.assertIn(DATA_TYPE_PHENOPACKET, ids)
self.assertIn(DATA_TYPE_READSET, ids)
self.assertIn(DATA_TYPE_EXPERIMENT_RESULT, ids)
Expand Down
2 changes: 0 additions & 2 deletions chord_metadata_service/chord/views_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@
from chord_metadata_service.cleanup import run_all_cleanup
from chord_metadata_service.experiments.models import Experiment, ExperimentResult
from chord_metadata_service.logger import logger
from chord_metadata_service.mcode.models import MCodePacket
from chord_metadata_service.phenopackets.models import Phenopacket

from . import data_types as dt

QUERYSET_FN: Dict[str, Callable] = {
dt.DATA_TYPE_EXPERIMENT: lambda dataset_id: Experiment.objects.filter(dataset_id=dataset_id),
dt.DATA_TYPE_MCODEPACKET: lambda dataset_id: MCodePacket.objects.filter(dataset_id=dataset_id),
dt.DATA_TYPE_PHENOPACKET: lambda dataset_id: Phenopacket.objects.filter(dataset_id=dataset_id),
dt.DATA_TYPE_EXPERIMENT_RESULT: lambda dataset_id: ExperimentResult.objects.filter(
experiment__dataset_id=dataset_id),
Expand Down
36 changes: 4 additions & 32 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
from chord_metadata_service.experiments.models import Experiment
from chord_metadata_service.experiments.serializers import ExperimentSerializer

from chord_metadata_service.mcode.models import MCodePacket
from chord_metadata_service.mcode.serializers import MCodePacketSerializer

from chord_metadata_service.metadata.elastic import es

Expand All @@ -40,7 +38,7 @@
from chord_metadata_service.phenopackets.models import Phenopacket, Biosample, Variant
from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer

from .data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_MCODEPACKET, DATA_TYPE_PHENOPACKET, DATA_TYPES
from .data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET, DATA_TYPES
from .models import Dataset

from collections import defaultdict
Expand All @@ -59,15 +57,6 @@ def experiment_dataset_summary(dataset):
}


def mcodepacket_dataset_summary(dataset):
mcodepackets = MCodePacket.objects.filter(dataset=dataset) # TODO

return {
"count": mcodepackets.count(),
"data_type_specific": {}, # TODO
}


def variant_dataset_summary(dataset):
variants = Variant.objects.filter(dataset=dataset)

Expand Down Expand Up @@ -158,20 +147,6 @@ def experiment_query_results(query, params, options=None):
.prefetch_related(*EXPERIMENT_PREFETCH)


def mcodepacket_query_results(query, params, options=None):
# TODO: possibly a quite inefficient way of doing things...
# TODO: select_related / prefetch_related for instant performance boost!
queryset = MCodePacket.objects.filter(
id__in=data_type_results(query, params, "id")
)

output_format = options.get("output") if options else None
if output_format == OUTPUT_FORMAT_VALUES_LIST:
return get_values_list(queryset, options)

return queryset


def get_biosamples_with_experiment_details(subject_ids):
"""
The function returns a queryset where each entry represents a biosample obtained from a subject, along with
Expand Down Expand Up @@ -244,13 +219,11 @@ def phenopacket_query_results(query, params, options=None):

QUERY_RESULTS_FN: Dict[str, Callable] = {
DATA_TYPE_EXPERIMENT: experiment_query_results,
DATA_TYPE_MCODEPACKET: mcodepacket_query_results,
DATA_TYPE_PHENOPACKET: phenopacket_query_results,
}

QUERY_RESULT_SERIALIZERS = {
DATA_TYPE_EXPERIMENT: ExperimentSerializer,
DATA_TYPE_MCODEPACKET: MCodePacketSerializer,
DATA_TYPE_PHENOPACKET: PhenopacketSerializer,
}

Expand All @@ -260,7 +233,7 @@ def search(request, internal_data=False):
Generic function that takes a request object containing the following parameters:
- query: a Bento specific string representation of a query. e.g.
["#eq", ["#resolve", "experiment_results", "[item]", "file_format"], "VCF"]
- data_type: one of "experiment", "mcode", "phenopacket"
- data_type: one of "experiment", "phenopacket"
If internal_data is False, this function returns the tables id where matches
are found.
If internal_data is True, this function returns matches grouped by their
Expand Down Expand Up @@ -361,7 +334,7 @@ def chord_private_search(request):
- query: a Bento specific object representing a query e.g.:
["#eq", ["#resolve", "experiment_results", "[item]", "file_format"], "VCF"]
Note: for GET method, it must be encoded as a JSON string.
- data_type: one of "phenopackets"/"experiments"/"mcodepackets"
- data_type: one of "phenopackets"/"experiments"
- optional parameters:
see chord_private_table_search
Expand Down Expand Up @@ -489,7 +462,7 @@ def get_chord_search_parameters(request, data_type=None):
- request: DRF Request object. See `chord_private_table_search` for a
detail of the possible values. Note that the "output" parameter is not
implemented for this search.
- data_type: optional argument. Can be "experiment"/"phenopacket"/"mcodepacket"
- data_type: optional argument. Can be "experiment"/"phenopacket"
This value is provided for the chord searches that are restricted to
a specific table (values inferred from the table properties)
- returns:
Expand Down Expand Up @@ -622,5 +595,4 @@ def dataset_summary(request: HttpRequest, dataset_id: str):
return Response({
DATA_TYPE_PHENOPACKET: phenopacket_dataset_summary(dataset=dataset),
DATA_TYPE_EXPERIMENT: experiment_dataset_summary(dataset=dataset),
DATA_TYPE_MCODEPACKET: mcodepacket_dataset_summary(dataset=dataset),
})
24 changes: 0 additions & 24 deletions chord_metadata_service/chord/workflows/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
"WORKFLOW_PHENOPACKETS_JSON",
"WORKFLOW_EXPERIMENTS_JSON",
"WORKFLOW_FHIR_JSON",
"WORKFLOW_MCODE_FHIR_JSON",
"WORKFLOW_MCODE_JSON",
"WORKFLOW_READSET",
"WORKFLOW_MAF_DERIVED_FROM_VCF_JSON",
"WORKFLOW_VCF2MAF",
Expand All @@ -22,7 +20,6 @@
DATA_TYPE_EXPERIMENT,
DATA_TYPE_EXPERIMENT_RESULT,
DATA_TYPE_PHENOPACKET,
DATA_TYPE_MCODEPACKET,
DATA_TYPE_READSET,
)

Expand All @@ -31,8 +28,6 @@
WORKFLOW_PHENOPACKETS_JSON = "phenopackets_json"
WORKFLOW_EXPERIMENTS_JSON = "experiments_json"
WORKFLOW_FHIR_JSON = "fhir_json"
WORKFLOW_MCODE_FHIR_JSON = "mcode_fhir_json"
WORKFLOW_MCODE_JSON = "mcode_json"
WORKFLOW_READSET = "readset"
WORKFLOW_MAF_DERIVED_FROM_VCF_JSON = "maf_derived_from_vcf_json"
WORKFLOW_VCF2MAF = "vcf2maf"
Expand Down Expand Up @@ -130,25 +125,6 @@ def json_file_output(id_: str, output_name: Optional[str] = None):

]
},
WORKFLOW_MCODE_FHIR_JSON: {
"name": "MCODE FHIR Resources JSON",
"description": "This ingestion workflow will validate and import a mCODE FHIR 4.0. schema-compatible "
"JSON document, and convert it to the Bento metadata service's internal mCODE-based "
"data model.",
"data_type": DATA_TYPE_MCODEPACKET,
"file": "mcode_fhir_json.wdl",
"inputs": [KATSU_URL_INPUT, json_file_input("json_document")],
"outputs": [json_file_output("json_document", "ingest.json")],
},
WORKFLOW_MCODE_JSON: {
"name": "MCODE Resources JSON",
"description": "This ingestion workflow will validate and import the Bento metadata service's "
"internal mCODE-based JSON document",
"data_type": DATA_TYPE_MCODEPACKET,
"file": "mcode_json.wdl",
"inputs": [KATSU_URL_INPUT, json_file_input("json_document")],
"outputs": [json_file_output("json_document", "ingest.json")],
},
WORKFLOW_READSET: {
"name": "Readset",
"description": "This workflow will copy readset files over to DRS.",
Expand Down
21 changes: 0 additions & 21 deletions chord_metadata_service/chord/workflows/wdls/mcode_fhir_json.wdl

This file was deleted.

21 changes: 0 additions & 21 deletions chord_metadata_service/chord/workflows/wdls/mcode_json.wdl

This file was deleted.

0 comments on commit 2817fd8

Please sign in to comment.